source: palm/trunk/TUTORIAL/SOURCE/restarts_with_mrun.tex @ 945

Last change on this file since 945 was 945, checked in by maronga, 12 years ago

added/updated several tutorial files

  • Property svn:executable set to *
  • Property svn:keywords set to Id
File size: 14.6 KB
RevLine 
[945]1% $Id: restarts_with_mrun.tex 945 2012-07-17 15:43:01Z maronga $
2\input{header_tmp.tex}
3%\input{../header_lectures.tex}
4
5\usepackage[utf8]{inputenc}
6\usepackage{ngerman}
7\usepackage{pgf}
8\usetheme{Dresden}
9\usepackage{subfigure}
10\usepackage{units}
11\usepackage{multimedia}
12\usepackage{hyperref}
13\newcommand{\event}[1]{\newcommand{\eventname}{#1}}
14\usepackage{xmpmulti}
15\usepackage{tikz}
16\usetikzlibrary{shapes,arrows,positioning}
17\usetikzlibrary{decorations.markings}
18\usetikzlibrary{decorations.pathreplacing}
19\def\Tiny{\fontsize{4pt}{4pt}\selectfont}
20\usepackage{amsmath}
21\usepackage{amssymb}
22\usepackage{multicol}
23\usepackage{pdfcomment}
24\usepackage{graphicx}
25\usepackage{listings}
26\lstset{showspaces=false,language=fortran,basicstyle=
27        \ttfamily,showstringspaces=false,captionpos=b}
28
29\institute{Institut fÌr Meteorologie und Klimatologie, Leibniz UniversitÀt Hannover}
30\date{last update: \today}
31\event{PALM Seminar}
32\setbeamertemplate{navigation symbols}{}
33
34\setbeamertemplate{footline}
35  {
36    \begin{beamercolorbox}[rightskip=-0.1cm]&
37     {\includegraphics[height=0.65cm]{imuk_logo.pdf}\hfill \includegraphics[height=0.65cm]{luh_logo.pdf}}
38    \end{beamercolorbox}
39    \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,
40      leftskip=.3cm,rightskip=0.3cm plus1fil]{title in head/foot}
41      {\leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor} \hfill \eventname \hfill \insertframenumber \; / \inserttotalframenumber}
42    \end{beamercolorbox}
43    \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot}
44    \end{beamercolorbox}
45  }
46%\logo{\includegraphics[width=0.3\textwidth]{luhimuk_logo.pdf}}
47
48\title[Carrying out restart runs with mrun]{Carrying out restart runs with \texttt{mrun}}
49\author{Siegfried Raasch}
50
51\begin{document}
52
53% Folie 1
54\begin{frame}
55   \titlepage
56\end{frame}
57
58\section{Carrying out restart runs with mrun}
59\subsection{Carrying out restart runs with mrun}
60
61         
62
63% Folie 2
64\begin{frame}
65   \frametitle{Definition of “restart run“}
66 
67   \begin{itemize}
68      \item<1-> A \textbf{“restart run“} is a model run, which starts with an initial condition given by the simulated flow at the end of a previous (restart or initial) run.
69      \item<2-> In order to carry out a restart run, a file has to be written at the end of the previous run, which contains the values of all flow variables at the necessary time steps (Runge-Kutta: $t$, leap-frog: $t$, $t-\Delta t$). This file has to be read at the beginning of the restart run.
70      \item<3-> Initial and respective restart runs form a so called \textbf{job chain}.
71   \end{itemize} 
72
73\end{frame}
74
75
76% Folie 3
77\begin{frame}
78   \frametitle{Reasons for Restart Runs}
79 
80   \begin{itemize}
81      \item<1-> The maximum job time is generally limited by the queuing system:
82      \begin{itemize}
83         \item<1-> simulations must be split into several parts
84      \end{itemize} 
85      \item<2-> The user wants to carry out several runs on the basis of the same initial temporal development:
86      \begin{itemize}
87         \item<1-> the initial phase needs to be simulated only once,
88all runs start from the end point of this initial phase by reading the flow field data written at the end of the initial run
89      \end{itemize}
90   \end{itemize} 
91
92\end{frame}
93
94
95% Folie 4 (Zu voll)?
96\begin{frame}
97   \frametitle{Carrying Out Restart Runs With \texttt{mrun}}
98   \scriptsize
99
100   Concerning \texttt{mrun}, the first thing required to enable restart runs is to use the additional activating string \grqq \texttt{restart}\grqq\, in the \texttt{mrun}-call for the \underline{initial run}:\\
101   \quad \texttt{mrun -d test ... -r ''d3\# restart''}\\
102   This will have the following effects:
103   \vspace{2mm}
104   \begin{itemize}
105      \item<2-> At the end of the run, all necessary variables will bei written as binary data to the local file \texttt{BINOUT}. This is caused by an entry in the configuration file\\
106      \quad \texttt{\%write\underline{ }binary true restart}\\
107      which sets the environment variable \texttt{write\underline{ }binary}, which is in turn read by PALM from the local file \texttt{ENVPAR} created by \texttt{mrun}.
108      \item<3-> This binary file will be permanently stored in case that an appropriate file connection statement exists\\
109      \begin{columns}
110         \column{1.05\textwidth}
111         \texttt{BINOUT  out:loc:flpe restart \~{}/palm/current\underline{ }version/JOBS/\$fname/RESTART  \underline{ }d3d}
112      \end{columns}
113      \item<4-> If, during the run, PALM detects that the simulation cannot be finished due to limited job time, it tells \texttt{mrun} (by creating a local file named \texttt{CONTINUE\underline{ }RUN}) that a restart job has to be started. \texttt{mrun} will then automatically start such a job by submitting the command\\
114      \quad \texttt{mrun -d test ... -r ''d3f restart''}\\
115      on the \textbf{local host}. Options of this command are nearly the same as of the initial run, but every sharp symbol (\grqq\#\grqq) in the activating strings is replaced by an \grqq f\grqq.
116   \end{itemize}
117   \onslide<5->\textcolor{red}{\textbf{This effects the activation of file connections for the restart job!}}
118\end{frame}
119
120
121% Folie 5
122\begin{frame}
123   \frametitle{Input Files Necessary For Restart Jobs}
124   \scriptsize
125   \vspace{3mm}
126   File connection statements for input files from the default \texttt{.mrun.config} file:\\
127   \quad \texttt{PARIN \hspace{0.5em} in:job \hspace{3em} d3\# \hspace{0.5em} \$base\underline{ }data/\$fname/INPUT \hspace{1.5em} \underline{ }p3d}\\
128   \quad \texttt{PARIN \hspace{0.5em} in:job \hspace{3em} d3f \hspace{0.5em} \$base\underline{ }data/\$fname/INPUT \hspace{1.5em} \underline{ }p3df}\\
129   \quad \texttt{BININ \hspace{0.5em} in:loc:flpe \hspace{0.5em} d3f \hspace{0.5em} \$base\underline{ }data/\$fname/RESTART \hspace{0.5em} \underline{ }d3d}\\
130   \vspace{4mm}
131   \begin{itemize}
132      \item<2-> For the restart job, the model receives a different parameter file than for the initial job (e.g. \texttt{example\underline{ }cbl\underline{ }p3d\textcolor{blue}{f}} instead of \texttt{example\underline{ }cbl\underline{ }p3d}).\\
133   \vspace{2mm}
134   The parameter file for the restart job is nearly the same as for the initial run, but it must contain the parameter setting\\
135   \quad \texttt{initializing\underline{ }actions = 'read\underline{ }restart\underline{ }data'}\\
136   in the \texttt{\&inipar}-NAMELIST-group. All other \texttt{\&inipar}-parameter-settings are ignored!\\
137   \vspace{2mm}
138   \texttt{\&d3par}-parameter values can freely be changed compared with the parameter file for the initial run.\\
139   \vspace{4mm}
140   \item<3-> Input binary data file (\texttt{BININ}) is necessary (and available) only for restart jobs
141   \end{itemize}
142\end{frame}
143
144
145% Folie 6
146\begin{frame}
147   \frametitle{Output File Handling in Restart Jobs }
148   \scriptsize
149   \vspace{2mm} 
150   Example for output file connection statements from the default \texttt{.mrun.config} file:\\
151   \vspace{2mm}
152   \quad \texttt{RUN\underline{ }CONTROL \hspace{0.5em} out:loc:tr \hspace{1em} d3\# \hspace{0.5em} \$base\underline{ }data/\$fname/MONITORING \hspace{0.5em} \underline{ }rc}\\
153   \quad \texttt{RUN\underline{ }CONTROL \hspace{0.5em} out:loc:tra \hspace{0.5em} d3f \hspace{0.5em} \$base\underline{ }data/\$fname/MONITORING \hspace{0.5em} \underline{ }rc}\\
154   \vspace{2mm}
155   In case of restart jobs, the contents of many local output files are appended to the respective permanent files from the initial or previous run by using the \texttt{tra} file attribute.\\
156   \vspace{6mm}
157   \onslide<2-> File connection statement example for appending netCDF files when PALM is running on a remote host:\\
158   \quad \texttt{DATA\underline{ }1D\underline{ }PR\underline{ }NETCDF\hspace{1em}in:loc\hspace{2.5em}prf\hspace{3em}\$base\underline{ }data/\$fname/OUTPUT\hspace{0.5em}\underline{ }pr\hspace{0.5em}nc}\\
159   \quad \texttt{DATA\underline{ }1D\underline{ }PR\underline{ }NETCDF\hspace{1em}out:loc\hspace{2em}pr\#:prf\hspace{1em}\$base\underline{ }data/\$fname/OUTPUT\hspace{0.5em}\underline{ }pr\hspace{0.5em}nc}\\
160   \quad \texttt{DATA\underline{ }1D\underline{ }PR\underline{ }NETCDF\hspace{1em}out:loc:tr\hspace{0.5em}pr\#:prf\hspace{1em}\$base\underline{ }data/\$fname/OUTPUT\hspace{0.5em}\underline{ }pr\hspace{0.5em}nc}\\
161   \vspace{2mm}
162   The netCDF file from the respective previous run has to be provided as an INPUT file.\\
163   \vspace{2mm}
164   Therefore, if running PALM on a remote host, a copy of this data file must be additionally stored on the remote host (second statement). On the local host, each run creates a new file (cycle) which contains the complete data from the current run and all previous runs.
165   
166\end{frame}
167
168
169% Folie 7 (Zu voll?)
170\begin{frame}
171   \frametitle{Handling of Large Binary Data Files}
172   \scriptsize
173   \begin{columns}
174      \column{1.1\textwidth}
175      \begin{itemize}
176         \item<1-> Typically, the binary restart files are very large, so that they cannot be stored in the user's home-directory because of limited file quotas. Also, hard disks where \texttt{/home} is stored are typically very slow, so that the copy process needs very long time.
177         \item<2-> Using the file attribute \texttt{fl} (abbreviation for german \grqq Fortsetzungslauf\grqq) in the output file connection statement causes \texttt{mrun} to copy the local file to a special directory, which can be defined in the configuration file by the environment variable \texttt{tmp\underline{ }data\underline{ }catalog}. The permanent file described in the connection statement is also created, but it is \textbf{empty}.
178         \item<3-> At the end of the job, the second last cycle of the respective file with attribute \texttt{fl} is automatically deleted by \texttt{mrun} from the \texttt{tmp\underline{ }data\underline{ }catalog} in order to spare disc space. This can be prevented by setting the \texttt{mrun}-option \grqq\texttt{-k}\grqq (keep data from previous run).
179      \end{itemize}
180   \end{columns}
181   \ \\
182      \onslide<4-> \textbf{Example:}\\
183      \tiny \quad \texttt{\%base\underline{ }data\hspace{4.5em}\~{}/palm/current\underline{ }version/JOBS}\\
184      \tiny \quad \texttt{\%tmp\underline{ }data\underline{ }catalog\hspace{1.0em}/gfs2/work/niksiraa/palm\underline{ }restart\underline{ }data}\\
185      \vspace{1mm}
186      \tiny \quad \texttt{BINOUT\hspace{1.0em}out:loc:flpe\hspace{1.0em}restart\hspace{1.0em}\$base\underline{ }data/\$fname/RESTART\hspace{1.0em}\underline{ }d3d}\\
187      \ \\
188      \onslide<5-> \scriptsize \textbf{Files (directories) created when using \texttt{-d example\underline{ }cbl}:}  \\
189      \tiny \quad \texttt{/gfs2/work/niksiraa/palm\underline{ }restart\underline{ }data/example\underline{ }cbl\underline{ }d3d}\\
190      \tiny \quad \texttt{\~{}/palm/current\underline{ }version/JOBS/example/RESTART/example\underline{ }cbl\underline{ }d3d \# empty file (directory)}\\
191   \vspace{1mm}
192   \onslide<6-> \scriptsize \textcolor{red}{Concerning input files, \texttt{mrun} always determines the current cycle number to be \underline{used from the contents of the directory defined by the file connection statement!}}
193
194\end{frame}
195
196
197% Folie 8
198\begin{frame}
199   \frametitle{Checking the Restart Job Execution}
200   \scriptsize
201   \begin{itemize}
202      \item essentially by looking at the messages in the job protocol file:
203   \end{itemize} 
204   
205   \centering
206   \includegraphics[width=0.93\textwidth]{restarts_with_mrun_figures/checking.png}
207   \begin{tikzpicture}[remember picture, overlay]
208      \node[rectangle, draw,text width=0.29\textwidth, fill=white] at (-18mm,62mm) {\noindent \scriptsize In this example, restart time has been set
209     
210      manually by the user.};
211   \end{tikzpicture}
212
213\end{frame}
214
215
216% Folie 9
217\begin{frame}
218   \frametitle{Setting the Restart Time Manually}
219   \scriptsize
220   \begin{columns}
221      \column{1.07\textwidth}
222      \begin{itemize}
223         \item<1-> By default, PALM checks after every timestep, if enough time remains from the job cpu limit to carry out the next timestep:\\
224         \quad \grqq\texttt{total job time}\grqq\, - \grqq\texttt{time already consumed}\grqq\, $\leftarrow$ \texttt{termination\underline{ }time\underline{ }needed}\\
225(as given by \texttt{mrun}-option \texttt{-t} ...) \hspace{5mm}  (as given by parameter in \texttt{\&d3par}-NAMELIST)
226         \item<2-> \texttt{termination\underline{ }time\underline{ }needed} has to include the cpu time needed before running PALM (e.g. for compilation, copying of input data, etc.; default value: 300 s)!\\
227         \ \\
228         \onslide<3-> \textbf{Warning:}\\
229         \quad \quad \grqq\texttt{total job time}\grqq\, $\leftarrow$ \texttt{termination\underline{ }time\underline{ }needed},\\
230         \quad forces a restart after the first timestep!
231         \item<4-> \texttt{\&d3par}-parameters \texttt{restart\underline{ }time} and \texttt{dt\underline{ }restart} can be used to set restart time(s) manually.
232         \item<5-> In case of manually setting the restart time, the default checking (see above) is still active and a restart will be automatically forced if the job reaches its cpu limit, even if the manually set restart time has not been reached!
233      \end{itemize} 
234   \end{columns}
235\end{frame}
236
237
238% Folie 10
239\begin{frame}
240   \frametitle{Starting Restart Jobs Manually}
241   \scriptsize
242      \begin{itemize}
243         \item<1-> After a job has finished (\texttt{end\underline{ }time} has been reached), the user can submit a restart job manually (provided that restart data have been saved) by entering:\\
244         \vspace{2mm}
245         \quad \texttt{mrun ... -r ''d3f ...'' ...}\\
246         or\\
247              \quad \texttt{mrun ... -r ''d3f restart ...'' ...}\\
248              \ \\
249         \item<2-> Remember to increase the value of \texttt{end\underline{ }time} in the parameter file before submitting the job.
250         \item<3-> If a manually started restart job shall continue a run of a former job chain which is somewhere in the middle of this chain, all binary files with respective higher cycle numbers have to be deleted or removed from their respective directories.
251   \end{itemize} 
252\end{frame}
253
254
255% Folie 11
256\begin{frame}
257   \frametitle{Exercise}
258   
259   \begin{itemize}
260      \item<2-> Set the job time ($-t$ ...) of a run (e.g. the example-run) in a way that at least one restart job is necessary to reach the end time of the simulation.
261      \item<3-> Alternatively, create a job chain containing at least one restart run by setting the \texttt{d3par}-parameters \texttt{restart\underline{ }time} and \texttt{dt\underline{ }restart} manually.
262      \item<4-> Compare, if the results from the job chain are the same as if the simulated is carried out in one single job. (They should be!)
263      \item<5-> Carry out this job and watch what happens.
264      \item<6-> Delete the results of the restart job(s) and carry out the restart from the initial run manually.
265   \end{itemize} 
266   
267\end{frame}
268
269
270\end{document}
Note: See TracBrowser for help on using the repository browser.