source: palm/trunk/TUTORIAL/SOURCE/restarts_with_mrun.tex @ 1468

Last change on this file since 1468 was 973, checked in by maronga, 12 years ago

tutorial updates

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 14.4 KB
RevLine 
[945]1% $Id: restarts_with_mrun.tex 973 2012-08-07 16:03:47Z maronga $
2\input{header_tmp.tex}
3%\input{../header_lectures.tex}
4
5\usepackage[utf8]{inputenc}
6\usepackage{ngerman}
7\usepackage{pgf}
8\usetheme{Dresden}
9\usepackage{subfigure}
10\usepackage{units}
11\usepackage{multimedia}
12\usepackage{hyperref}
13\newcommand{\event}[1]{\newcommand{\eventname}{#1}}
14\usepackage{xmpmulti}
15\usepackage{tikz}
16\usetikzlibrary{shapes,arrows,positioning}
17\usetikzlibrary{decorations.markings}
18\usetikzlibrary{decorations.pathreplacing}
19\def\Tiny{\fontsize{4pt}{4pt}\selectfont}
20\usepackage{amsmath}
21\usepackage{amssymb}
22\usepackage{multicol}
23\usepackage{pdfcomment}
24\usepackage{graphicx}
25\usepackage{listings}
26\lstset{showspaces=false,language=fortran,basicstyle=
27        \ttfamily,showstringspaces=false,captionpos=b}
28
29\institute{Institut fÌr Meteorologie und Klimatologie, Leibniz UniversitÀt Hannover}
30\date{last update: \today}
31\event{PALM Seminar}
32\setbeamertemplate{navigation symbols}{}
33
34\setbeamertemplate{footline}
35  {
36    \begin{beamercolorbox}[rightskip=-0.1cm]&
37     {\includegraphics[height=0.65cm]{imuk_logo.pdf}\hfill \includegraphics[height=0.65cm]{luh_logo.pdf}}
38    \end{beamercolorbox}
39    \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,
40      leftskip=.3cm,rightskip=0.3cm plus1fil]{title in head/foot}
41      {\leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor} \hfill \eventname \hfill \insertframenumber \; / \inserttotalframenumber}
42    \end{beamercolorbox}
43    \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot}
44    \end{beamercolorbox}
45  }
46%\logo{\includegraphics[width=0.3\textwidth]{luhimuk_logo.pdf}}
47
48\title[Carrying out restart runs with mrun]{Carrying out restart runs with \texttt{mrun}}
49\author{Siegfried Raasch}
50
51\begin{document}
52
53% Folie 1
54\begin{frame}
55   \titlepage
56\end{frame}
57
58\section{Carrying out restart runs with mrun}
59\subsection{Carrying out restart runs with mrun}
60
61         
62
63% Folie 2
64\begin{frame}
65   \frametitle{Definition of “restart run“}
66 
67   \begin{itemize}
68      \item<1-> A \textbf{“restart run“} is a model run, which starts with an initial condition given by the simulated flow at the end of a previous (restart or initial) run.
69      \item<2-> In order to carry out a restart run, a file has to be written at the end of the previous run, which contains the values of all flow variables at the necessary time steps (Runge-Kutta: $t$, leap-frog: $t$, $t-\Delta t$). This file has to be read at the beginning of the restart run.
70      \item<3-> Initial and respective restart runs form a so called \textbf{job chain}.
71   \end{itemize} 
72
73\end{frame}
74
75
76% Folie 3
77\begin{frame}
78   \frametitle{Reasons for Restart Runs}
79 
80   \begin{itemize}
81      \item<1-> The maximum job time is generally limited by the queuing system:
82      \begin{itemize}
83         \item<1-> simulations must be split into several parts
84      \end{itemize} 
85      \item<2-> The user wants to carry out several runs on the basis of the same initial temporal development:
86      \begin{itemize}
87         \item<1-> the initial phase needs to be simulated only once,
88all runs start from the end point of this initial phase by reading the flow field data written at the end of the initial run
89      \end{itemize}
90   \end{itemize} 
91
92\end{frame}
93
94
[973]95% Folie 4
[945]96\begin{frame}
97   \frametitle{Carrying Out Restart Runs With \texttt{mrun}}
98   \scriptsize
[973]99   \begin{columns}[T]
100      \begin{column}{1.0\textwidth}
101         Concerning \texttt{mrun}, the first thing required to enable restart runs is to use the additional activating string \grqq \texttt{restart}\grqq\, in the \texttt{mrun}-call for the \underline{initial run}:\\
102         \vspace{1mm}
103         \quad \texttt{mrun -d test ... -r \dq d3\# restart\dq}\\
104         \ \\
105         This will have the following effects:
106         \vspace{1mm}
107         \tiny
108         \begin{itemize}
109            \item<2-> At the end of the run, all necessary variables will bei written as binary data to the local file \texttt{BINOUT}. This is caused by an entry in the configuration file\\
110            \vspace{1mm}
111            \quad \texttt{\%write\underline{ }binary true restart}\\
112            \vspace{1mm}
113            which sets the environment variable \texttt{write\underline{ }binary}, which is in turn read by PALM from the local file \texttt{ENVPAR} created by \texttt{mrun}.
114            \vspace{3mm}
115            \item<3-> This binary file will be permanently stored in case that an appropriate file connection statement exists\\
116            \vspace{1mm}
117            \quad \texttt{BINOUT  out:loc:flpe restart \~{}/palm/current\underline{ }version/JOBS/\$fname/RESTART  \underline{ }d3d}
118            \vspace{3mm}
119            \item<4-> If, during the run, PALM detects that the simulation cannot be finished due to limited job time, it tells \texttt{mrun} (by creating a local file named \texttt{CONTINUE\underline{ }RUN}) that a restart job has to be started. \texttt{mrun} will then automatically start such a job by submitting the command\\
120            \vspace{1mm}
121            \quad \texttt{mrun -d test ... -r \dq d3f restart\dq}\\
122            \vspace{1mm}
123            on the \textbf{local host}. Options of this command are nearly the same as of the initial run, but every sharp symbol (\grqq\#\grqq) in the activating strings is replaced by an \grqq f\grqq.
124         \end{itemize}
125         \scriptsize
126         \vspace{2mm}
127         \onslide<5->\textcolor{red}{\textbf{This effects the activation of file connections for the restart job!}}
128      \end{column}
129   \end{columns}
[945]130
131\end{frame}
132
133
134% Folie 5
135\begin{frame}
136   \frametitle{Input Files Necessary For Restart Jobs}
137   \scriptsize
138   \vspace{3mm}
139   File connection statements for input files from the default \texttt{.mrun.config} file:\\
140   \quad \texttt{PARIN \hspace{0.5em} in:job \hspace{3em} d3\# \hspace{0.5em} \$base\underline{ }data/\$fname/INPUT \hspace{1.5em} \underline{ }p3d}\\
141   \quad \texttt{PARIN \hspace{0.5em} in:job \hspace{3em} d3f \hspace{0.5em} \$base\underline{ }data/\$fname/INPUT \hspace{1.5em} \underline{ }p3df}\\
142   \quad \texttt{BININ \hspace{0.5em} in:loc:flpe \hspace{0.5em} d3f \hspace{0.5em} \$base\underline{ }data/\$fname/RESTART \hspace{0.5em} \underline{ }d3d}\\
143   \vspace{4mm}
144   \begin{itemize}
145      \item<2-> For the restart job, the model receives a different parameter file than for the initial job (e.g. \texttt{example\underline{ }cbl\underline{ }p3d\textcolor{blue}{f}} instead of \texttt{example\underline{ }cbl\underline{ }p3d}).\\
[973]146   \vspace{4mm}
[945]147   The parameter file for the restart job is nearly the same as for the initial run, but it must contain the parameter setting\\
[973]148   \vspace{1mm}
[945]149   \quad \texttt{initializing\underline{ }actions = 'read\underline{ }restart\underline{ }data'}\\
[973]150   \vspace{1mm}
[945]151   in the \texttt{\&inipar}-NAMELIST-group. All other \texttt{\&inipar}-parameter-settings are ignored!\\
[973]152   \vspace{4mm}
[945]153   \texttt{\&d3par}-parameter values can freely be changed compared with the parameter file for the initial run.\\
154   \vspace{4mm}
[973]155   \item<3-> Input binary data file (\texttt{BININ}) is necessary (and available) only for\\ restart jobs
[945]156   \end{itemize}
157\end{frame}
158
159
160% Folie 6
161\begin{frame}
162   \frametitle{Output File Handling in Restart Jobs }
163   \scriptsize
164   \vspace{2mm} 
165   Example for output file connection statements from the default \texttt{.mrun.config} file:\\
166   \vspace{2mm}
167   \quad \texttt{RUN\underline{ }CONTROL \hspace{0.5em} out:loc:tr \hspace{1em} d3\# \hspace{0.5em} \$base\underline{ }data/\$fname/MONITORING \hspace{0.5em} \underline{ }rc}\\
168   \quad \texttt{RUN\underline{ }CONTROL \hspace{0.5em} out:loc:tra \hspace{0.5em} d3f \hspace{0.5em} \$base\underline{ }data/\$fname/MONITORING \hspace{0.5em} \underline{ }rc}\\
169   \vspace{2mm}
170   In case of restart jobs, the contents of many local output files are appended to the respective permanent files from the initial or previous run by using the \texttt{tra} file attribute.\\
171   \vspace{6mm}
172   \onslide<2-> File connection statement example for appending netCDF files when PALM is running on a remote host:\\
173   \quad \texttt{DATA\underline{ }1D\underline{ }PR\underline{ }NETCDF\hspace{1em}in:loc\hspace{2.5em}prf\hspace{3em}\$base\underline{ }data/\$fname/OUTPUT\hspace{0.5em}\underline{ }pr\hspace{0.5em}nc}\\
174   \quad \texttt{DATA\underline{ }1D\underline{ }PR\underline{ }NETCDF\hspace{1em}out:loc\hspace{2em}pr\#:prf\hspace{1em}\$base\underline{ }data/\$fname/OUTPUT\hspace{0.5em}\underline{ }pr\hspace{0.5em}nc}\\
175   \quad \texttt{DATA\underline{ }1D\underline{ }PR\underline{ }NETCDF\hspace{1em}out:loc:tr\hspace{0.5em}pr\#:prf\hspace{1em}\$base\underline{ }data/\$fname/OUTPUT\hspace{0.5em}\underline{ }pr\hspace{0.5em}nc}\\
176   \vspace{2mm}
177   The netCDF file from the respective previous run has to be provided as an INPUT file.\\
178   \vspace{2mm}
179   Therefore, if running PALM on a remote host, a copy of this data file must be additionally stored on the remote host (second statement). On the local host, each run creates a new file (cycle) which contains the complete data from the current run and all previous runs.
180   
181\end{frame}
182
183
[973]184% Folie 7
[945]185\begin{frame}
186   \frametitle{Handling of Large Binary Data Files}
187   \scriptsize
188   \begin{columns}
189      \column{1.1\textwidth}
[973]190      \vspace{-1mm}
[945]191      \begin{itemize}
192         \item<1-> Typically, the binary restart files are very large, so that they cannot be stored in the user's home-directory because of limited file quotas. Also, hard disks where \texttt{/home} is stored are typically very slow, so that the copy process needs very long time.
[973]193         \vspace{1mm}
[945]194         \item<2-> Using the file attribute \texttt{fl} (abbreviation for german \grqq Fortsetzungslauf\grqq) in the output file connection statement causes \texttt{mrun} to copy the local file to a special directory, which can be defined in the configuration file by the environment variable \texttt{tmp\underline{ }data\underline{ }catalog}. The permanent file described in the connection statement is also created, but it is \textbf{empty}.
[973]195         \vspace{1mm}
[945]196         \item<3-> At the end of the job, the second last cycle of the respective file with attribute \texttt{fl} is automatically deleted by \texttt{mrun} from the \texttt{tmp\underline{ }data\underline{ }catalog} in order to spare disc space. This can be prevented by setting the \texttt{mrun}-option \grqq\texttt{-k}\grqq (keep data from previous run).
197      \end{itemize}
198   \end{columns}
[973]199   \vspace{2mm}
[945]200      \onslide<4-> \textbf{Example:}\\
201      \tiny \quad \texttt{\%base\underline{ }data\hspace{4.5em}\~{}/palm/current\underline{ }version/JOBS}\\
202      \tiny \quad \texttt{\%tmp\underline{ }data\underline{ }catalog\hspace{1.0em}/gfs2/work/niksiraa/palm\underline{ }restart\underline{ }data}\\
203      \vspace{1mm}
204      \tiny \quad \texttt{BINOUT\hspace{1.0em}out:loc:flpe\hspace{1.0em}restart\hspace{1.0em}\$base\underline{ }data/\$fname/RESTART\hspace{1.0em}\underline{ }d3d}\\
205      \ \\
206      \onslide<5-> \scriptsize \textbf{Files (directories) created when using \texttt{-d example\underline{ }cbl}:}  \\
207      \tiny \quad \texttt{/gfs2/work/niksiraa/palm\underline{ }restart\underline{ }data/example\underline{ }cbl\underline{ }d3d}\\
208      \tiny \quad \texttt{\~{}/palm/current\underline{ }version/JOBS/example/RESTART/example\underline{ }cbl\underline{ }d3d \# empty file (directory)}\\
[973]209   \vspace{2mm}
210   \onslide<6-> \scriptsize \textcolor{red}{Concerning input files, \texttt{mrun} always determines the current cycle number to be \underline{used from the contents of the directory defined by the file connection}\\ \underline{statement!}}
[945]211
212\end{frame}
213
214
215% Folie 8
216\begin{frame}
217   \frametitle{Checking the Restart Job Execution}
218   \scriptsize
219   \begin{itemize}
220      \item essentially by looking at the messages in the job protocol file:
221   \end{itemize} 
222   
223   \centering
224   \includegraphics[width=0.93\textwidth]{restarts_with_mrun_figures/checking.png}
225   \begin{tikzpicture}[remember picture, overlay]
226      \node[rectangle, draw,text width=0.29\textwidth, fill=white] at (-18mm,62mm) {\noindent \scriptsize In this example, restart time has been set
227     
228      manually by the user.};
229   \end{tikzpicture}
230
231\end{frame}
232
233
234% Folie 9
235\begin{frame}
236   \frametitle{Setting the Restart Time Manually}
237   \scriptsize
238   \begin{columns}
239      \column{1.07\textwidth}
240      \begin{itemize}
241         \item<1-> By default, PALM checks after every timestep, if enough time remains from the job cpu limit to carry out the next timestep:\\
[973]242         \vspace{1mm}
243         (\quad \grqq\texttt{total job time}\grqq\, - \grqq\texttt{time already consumed}\grqq\,) \texttt{<=} \texttt{termination\underline{ }time\underline{ }needed}\\
244         (as given by \texttt{mrun}-option \texttt{-t} ...) \hspace{5mm}  (as given by parameter in \texttt{\&d3par}-NAMELIST)\\
245         \vspace{3mm}
[945]246         \item<2-> \texttt{termination\underline{ }time\underline{ }needed} has to include the cpu time needed before running PALM (e.g. for compilation, copying of input data, etc.; default value: 300 s)!\\
247         \ \\
248         \onslide<3-> \textbf{Warning:}\\
[973]249         \vspace{1mm}
250         \quad \quad \grqq\texttt{total job time}\grqq\, \texttt{<=} \texttt{termination\underline{ }time\underline{ }needed},\\
[945]251         \quad forces a restart after the first timestep!
[973]252         \vspace{3mm}
253         \item<4-> \texttt{\&d3par}-parameters \texttt{restart\underline{ }time} and \texttt{dt\underline{ }restart} can be used to set restart time(s) manually.\\
254         \vspace{3mm}
255         \item<5-> In case of manually setting the restart time, the default checking (see above) is still active and a restart will be automatically forced if the job reaches its cpu limit, even if the manually set restart time has not been reached!\\
[945]256      \end{itemize} 
257   \end{columns}
258\end{frame}
259
260
261% Folie 10
262\begin{frame}
263   \frametitle{Starting Restart Jobs Manually}
264   \scriptsize
265      \begin{itemize}
266         \item<1-> After a job has finished (\texttt{end\underline{ }time} has been reached), the user can submit a restart job manually (provided that restart data have been saved) by entering:\\
267         \vspace{2mm}
[973]268         \quad \texttt{mrun ... -r \dq d3f ...\dq\, ...}\\
[945]269         or\\
[973]270              \quad \texttt{mrun ... -r \dq d3f restart ...\dq\, ...}\\
[945]271              \ \\
272         \item<2-> Remember to increase the value of \texttt{end\underline{ }time} in the parameter file before submitting the job.
[973]273         \vspace{2mm}
[945]274         \item<3-> If a manually started restart job shall continue a run of a former job chain which is somewhere in the middle of this chain, all binary files with respective higher cycle numbers have to be deleted or removed from their respective directories.
275   \end{itemize} 
276\end{frame}
277
[973]278\end{document}
Note: See TracBrowser for help on using the repository browser.