source: palm/trunk/SCRIPTS/palm_wdd

Last change on this file was 4843, checked in by raasch, 23 months ago

local namelist parameter added to switch off the module although the respective module namelist appears in the namelist file, further copyright updates

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 7.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#--------------------------------------------------------------------------------#
4# This file is part of the PALM model system.
5#
6# PALM is free software: you can redistribute it and/or modify it under the terms
7# of the GNU General Public License as published by the Free Software Foundation,
8# either version 3 of the License, or (at your option) any later version.
9#
10# PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License along with
15# PALM. If not, see <http://www.gnu.org/licenses/>.
16#
17# Copyright 1997-2021  Leibniz Universitaet Hannover
18#--------------------------------------------------------------------------------#
19#
20# Current revisions:
21# -----------------
22#
23#
24# Former revisions:
25# -----------------
26# $Id: palm_wdd 4843 2021-01-15 15:22:11Z banzhafs $
27# Modified header
28#
29# 2718 2018-01-02 08:49:38Z maronga
30# Corrected "Former revisions" section
31#
32# 2696 2017-12-14 17:12:51Z kanani
33# Change in file header (GPL part)
34#
35# 2421 2017-09-07 10:36:34Z maronga
36# Fixed display of job progress.
37#
38# 2416 2017-09-06 14:28:14Z maronga
39# Adapted for palmrun
40#
41# 1619 2015-07-13 06:53:19Z maronga
42#
43# 1618 2015-07-13 06:52:15Z maronga
44# Added steering via configuration file, to be placed in the home directory of the
45# remote host to be monitored.
46#
47# 1613 2015-07-08 14:53:29Z maronga
48# Bugfix: tooltip for queuing name did not show up on first update.
49# New: added contect menu for showing the parameter file and the run control
50# output
51#
52# 1611 2015-07-07 12:23:22Z maronga
53# Initial revision
54#
55#
56# Description:
57# ------------
58# PALM watchdog client for monitoring batch jobs on a variety of hosts specified
59# by the user. The watchdog server requires python 2.7 or higher installed on
60# host to be monitored.
61#
62# Instructions:
63# -------------
64# 1) Modify the header section of palm_wd
65# 2) Move .wd.olddata and .wd.newdata to your palm directory
66#    (e.g. /home/user/current_version/.wd.newdata etc.)
67# 3) Modify a copy of palm_wdd for each host to be monitored and move it to the
68#    respective hosts
69# 4) Start the client either from mrungui or from shell by "nohup palm_wd&"
70#
71# To do:
72# ------
73# 1) Add "Options", "Help" and "Manual"
74# 2) Move user settings to a configuration file
75#------------------------------------------------------------------------------!
76
77import ConfigParser
78import os
79import pwd
80from subprocess import check_output
81import sys
82
83
84# Read configuration file
85# First check if the configuration file exists
86if ( os.path.exists('.wdd.config') == False ):
87    print "Error. No configuration file .wdd.config found."
88    raise SystemExit     
89
90config = ConfigParser.RawConfigParser()
91config.read('.wdd.config')
92
93cmd_readqueue      = config.get('Settings', 'readqueue').strip('"')
94cmd_tmpdir         = config.get('Settings', 'tmpdir').strip('"')
95cmd_canceljob      = config.get('Settings', 'canceljob').strip('"')
96cmd_checkjob       = config.get('Settings', 'checkjob').strip('"')
97cmd_realname_grep  = config.get('Settings', 'realname_grep').strip('"')
98cmd_starttime      = config.get('Settings', 'starttime').strip('"')
99cmd_starttime_grep = config.get('Settings', 'starttime_grep').strip('"')
100
101
102action   = str(sys.argv[1])
103data     = str(sys.argv[2])
104
105cmd_readqueue = cmd_readqueue + " " + pwd.getpwuid( os.getuid() )[ 0 ]
106cmd_tmpdir    = cmd_tmpdir + pwd.getpwuid( os.getuid() )[ 0 ]
107
108# reading queuing system
109def ReadQueue(username):
110 
111#  collect queuing information
112   try:
113      out = check_output(cmd_readqueue, shell=True)
114      job_list = out.splitlines()
115      out = None
116#  do nothing for empty results list
117   except:
118      job_list = []
119 
120 
121   job_data_tmp = []
122   for j in range(0,len(job_list)):
123
124      # Write temporary data array containing the job information.
125      job_data_tmp.append(j)
126      job_data_tmp[j] = job_list[j].split(" ")
127      job_data_tmp[j] = filter(None, job_data_tmp[j])
128
129      cmd_realname = cmd_checkjob + " " + job_data_tmp[j][0] + "|grep " + cmd_realname_grep
130
131      # retrieve real job name for all jobs
132      try: 
133         out = check_output(cmd_realname, shell=True)
134         job_realname = out.split(" ")[1].rstrip()
135      except:
136         job_realname = "error"
137
138
139      # for running jobs, determine progress                 
140      if ( job_data_tmp[j][2] == "Running" ):
141 
142         # collect progress information
143         cmd_progress = "cat " + cmd_tmpdir + "/" + job_realname + "/PROGRESS"
144         try: 
145
146            devnull = open(os.devnull, 'w')
147            out = check_output(cmd_progress, shell=True, stderr=devnull)
148            progress_lines = out.splitlines()
149            job_progress = progress_lines[1].split(" ")[3]
150            out = None
151         except:
152            job_progress = "0" 
153
154      else:
155         job_progress = "0" 
156
157      # return the job data
158      job_starttime = GetStartTime(job_data_tmp[j][0])
159      print job_realname + " " + job_data_tmp[j][0] + " " + job_data_tmp[j][3] + " " + job_data_tmp[j][2] + " " + job_progress + " "  + job_data_tmp[j][4] + " " + job_starttime
160
161
162# check details of specific job
163def CheckJob(jobid):
164
165   cmd_checkjob_tmp = cmd_checkjob + " " + jobid
166
167   try: 
168      out = check_output(cmd_checkjob_tmp, shell=True)
169      job_details = out
170   except:
171      job_details = "No details available." 
172
173   return job_details
174
175
176# cancel a specific job
177def CancelJob(jobid):
178
179   cmd_canceljob_tmp = cmd_canceljob + " " + jobid
180
181   try: 
182      out = check_output(cmd_canceljob_tmp, shell=True)
183      job_canceled = out
184   except:
185      job_canceled = "Action failed." 
186
187   return job_canceled
188
189
190# retrieve estimated start time of job
191def GetStartTime(jobid):
192
193   cmd_starttime_tmp = cmd_starttime + " " + jobid + "|grep \"" + cmd_starttime_grep + "\"" 
194
195   try: 
196      out = check_output(cmd_starttime_tmp, shell=True)
197      job_starttime = out.split()[5]
198   except:
199      job_starttime = "Action failed." 
200
201   return job_starttime
202
203
204def DoStopNow(jobid):
205
206   # collect progress information
207   cmd_dostop = "touch " + cmd_tmpdir + "/" + jobid + "/DO_STOP_NOW"
208   try: 
209      devnull = open(os.devnull, 'w')
210      out = check_output(cmd_dostop, shell=True, stderr=devnull)
211      out = None
212   except:
213      return_message = "Action failed." 
214      return return_message
215
216def DoRestartNow(jobid):
217
218   # collect progress information
219   cmd_dorestart = "touch " + cmd_tmpdir + "/" + jobid + "/DO_RESTART_NOW"
220   try: 
221      devnull = open(os.devnull, 'w')
222      out = check_output(cmd_dorestart, shell=True, stderr=devnull)
223      out = None
224   except:
225      return_message = "Action failed." 
226      return return_message
227
228def GetPARIN(jobid):
229
230   # collect progress information
231   cmd_dorestart = "cat " + cmd_tmpdir + "/" + jobid + "/PARIN"
232   try: 
233      devnull = open(os.devnull, 'w')
234      out = check_output(cmd_dorestart, shell=True, stderr=devnull)
235      return_message = out
236      out = None
237   except:
238      return_message = "Action failed."   + "cat " + cmd_tmpdir + "/" + jobid + "/PARIN" 
239     
240   return return_message
241
242def GetRC(jobid):
243
244   # collect progress information
245   cmd_dorestart = "cat " + cmd_tmpdir + "/" + jobid + "/RUN_CONTROL"
246   try: 
247      devnull = open(os.devnull, 'w')
248      out = check_output(cmd_dorestart, shell=True, stderr=devnull)
249      return_message = out
250      out = None
251   except:
252      return_message = "Action failed."
253     
254   return return_message
255
256# START OF MAIN
257if ( action == "queue" ):
258   ReadQueue(data)
259elif ( action == "check"):
260   print CheckJob(data)
261elif ( action == "cancel"):
262   print CancelJob(data)
263elif ( action == "start"):
264   print GetStartTime(data)
265elif ( action == "stop"):
266   print DoStopNow(data)
267elif ( action == "restart"):
268   print DoRestartNow(data)
269elif ( action == "parin"):
270   print GetPARIN(data)
271elif ( action == "rc"):
272   print GetRC(data) 
273else:
274   print "Error. Action " + action + " unknown."
275
Note: See TracBrowser for help on using the repository browser.