source: palm/trunk/TUTORIAL/SOURCE/debugging.tex @ 1817

Last change on this file since 1817 was 1515, checked in by boeske, 10 years ago

several updates in the tutorial

  • Property svn:keywords set to Id
File size: 14.6 KB
Line 
1% $Id: debugging.tex 1515 2015-01-02 11:35:51Z maronga $
2\input{header_tmp.tex}
3%\input{../header_LECTURE.tex}
4
5\usepackage[utf8]{inputenc}
6\usepackage{ngerman}
7\usepackage{pgf}
8\usepackage{subfigure}
9\usepackage{units}
10\usepackage{multimedia}
11\usepackage{hyperref}
12\newcommand{\event}[1]{\newcommand{\eventname}{#1}}
13\usepackage{xmpmulti}
14\usepackage{tikz}
15\usetikzlibrary{shapes,arrows,positioning}
16\def\Tiny{\fontsize{4pt}{4pt}\selectfont}
17\usepackage{amsmath}
18\usepackage{amssymb}
19\usepackage{multicol}
20\usepackage{pdfcomment}
21\usepackage{graphicx}
22\usepackage{listings}
23\lstset{showspaces=false,language=fortran,basicstyle=
24        \ttfamily,showstringspaces=false, captionpos=b,aboveskip=0pt,belowskip=0pt}
25
26
27\institute{Institute of Meteorology and Climatology, Leibniz UniversitÀt Hannover}
28\selectlanguage{english}
29\date{last update: \today}
30\event{PALM Seminar}
31\setbeamertemplate{navigation symbols}{}
32\setbeamersize{text margin left=.5cm,text margin right=.2cm}
33\setbeamertemplate{footline}
34  {
35    \begin{beamercolorbox}[rightskip=-0.1cm]&
36     {\includegraphics[height=0.65cm]{imuk_logo.pdf}\hfill \includegraphics[height=0.65cm]{luh_logo.pdf}}
37    \end{beamercolorbox}
38    \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,
39      leftskip=.3cm,rightskip=0.3cm plus1fil]{title in head/foot}
40      {\leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor} \hfill \eventname \hfill \insertframenumber \; / \inserttotalframenumber}
41    \end{beamercolorbox}
42    \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot}
43    \end{beamercolorbox}
44  }
45
46\title[PALM - Debugging]{PALM - Debugging}
47\author{PALM group}
48
49% Notes:
50% jede subsection bekommt einen punkt im menu (vertikal ausgerichtet.
51% jeder frame in einer subsection bekommt einen punkt (horizontal ausgerichtet)
52\begin{document}
53% Folie 1
54\begin{frame}
55\titlepage
56\end{frame}
57
58\section{Principal Sources and First Steps}
59\subsection{Principal Sources and First Steps}
60
61% Folie 2
62\begin{frame}
63   \frametitle{Principal Sources of Errors}
64   \small
65   \textbf{PALM runs can give rise to a large variety of errors ...}
66   \par\medskip
67   Some of the main possible reasons for errors are:
68   \begin{itemize}
69      \item<2->{Missing or wrong options in the {\tt mrun} call}
70      \item<3->{Errors in the configuration file}
71      \item<4->{Errors in the NAMELIST parameter file}
72      \item<5->{Errors in the {\tt ssh}-installation (authentication), if a remote host is used for batch jobs}
73      \item<6->{FORTRAN errors in the user code (user-interface files)} 
74      \item<7->{PALM runtime errors due to}
75      \begin{itemize}
76         \item<7->{wrong parameter settings}
77         \item<8->{errors in the user code}
78         \item<9->{errors in PALM‘s default code, which have not been detected so far (e.g. because some parameter combinations have never been tried so far)}
79      \end{itemize}
80   \end{itemize}
81\end{frame}
82
83% Folie 3
84\begin{frame}
85   \frametitle{First Steps of Debugging}
86   \footnotesize
87   \begin{itemize}
88      \item<2->{\textbf{Find out the principal reason of the error(s):}}
89      \begin{itemize}
90         \scriptsize
91         \item<3->{ Carefully analyze the job protocol file (or messages on the terminal, in case of interactive runs)
92               for any error messages or unexpected behaviour.}
93               \vspace{1.5mm}
94         \item<4->{In case of batch runs on a remote host, if the job protocol file is missing on the local host,
95               try if you can find it in {\tt $\sim$/job\_queue} on the remote host.}
96               \vspace{1.5mm}
97         \item<5->{If the job has run into a time limit, no job protocol files or messages might be created at all (system depending).}
98               \vspace{1.5mm}
99         \item<6->{Some typical errors which may occur during execution of mrun are automatically detected and displayed
100               by mrun in the job protocol or on the terminal: \\
101               \par\medskip
102               Respective error messages will begin with "'+++"'.} 
103               \vspace{1.5mm}
104         \item<7->{Compile and runtime error messages will only appear in the job protocol or on the terminal (in case of interactive runs).}
105               \vspace{1.5mm}
106         \item<8->{In case of runtime errors terminal messages may give first helpful hints about where errors are located.}       
107      \end{itemize}
108   \end{itemize}
109\end{frame}
110
111\section{Runtime Errors}
112\subsection{Runtime Errors}
113
114% Folie 4
115\begin{frame}
116   \frametitle{Debugging Runtime Errors (I)}
117   \small
118   \begin{itemize}
119      \item{In case of runtime errors, the available information depends on the compiler and on the compiler settings.}
120      \item<2->{The default options for the Intel-compiler ({\tt-O3} for fast execution) give
121            almost no information, e.g. about the subroutine or the line number of the code where the error occured.
122            Execution is even continued in case of floating point errors!}
123      \item<3->{Floating point error detection and traceback can be activated with compiler options
124            \par\medskip 
125            {\tt ifort -fpe0 -debug -traceback -O0 ...}}
126      \item<4->{The default {\tt .mrun.config.default} file contains an additional block with debug options. It can be used with mrun-call
127            \par\medskip
128            {\tt mrun ... -h \textless host\_identifier\textgreater \;-K \dq parallel trace\dq ...}}
129   \end{itemize}
130\end{frame}
131
132% Folie 5
133\begin{frame}[fragile]
134   \frametitle{Debugging Runtime Errors (II)}
135   \small
136   \tikzstyle{yellow} = [rectangle, draw, fill=yellow!30, text width=0.9\textwidth, font=\Tiny]
137   \tikzstyle{box} = [rectangle, draw, text width=0.95\textwidth, font=\scriptsize]
138   \begin{itemize}
139      \item{The configuration file {\tt .mrun.config.default} looks like this:} \\
140      \ \\
141      \onslide<2->
142      \begin{tikzpicture}
143         \node [yellow]{\begin{lstlisting}   
144.
145# The next line is just an example. Add your own line below or replace this line.
146%host_identifier   myhostname  lcmy
147#
148# The next block contains all informations for compiling the PALM code
149# This is the block for the optimized version
150%fopts             -I:<replace by mpi include path>:-fpe0:-O3:-xHost:...    <hi> parallel
151%lopts             -L:<replace by mpi library path>:-fpe0:-O3:-xHost:...    <hi> parallel
152.
153.
154# This is the block for the debug version
155%fopts             -C:-check:nooutput_conversion:-fpe0:-debug:...           <hi> parallel trace
156%lopts             -C:-check:nooutput_conversion:-fpe0:-debug:...           <hi> parallel trace
157.
158\end{lstlisting}
159         };
160      \end{tikzpicture}
161%   \begin{tikzpicture}     
162%      \node[box](box){ \begin{tabbing}
163%         .\\
164%         \# The next line is just an example. Add your own line below or replace this line. \\
165%         \%host\_identifier \quad myhostname \quad lcmy \\
166%         \# \\
167%         \# The next block contains all informations for compiling the PALM code \\
168%         \textbf{\# This is the block for the optimized version}
169%         . \\
170%         \%fopts \quad \= -xS:-nbs:-convert:...   \qquad \= \textless hi\textgreater parallel \\
171%         \%lopts \> -xS:-nbs:-Vaxlib:-L:...        \> \textless hi\textgreater parallel \\
172%         . \\
173%         . \\
174%         \textbf{\# This is the block for the debug version}
175%         . \\
176%         \%fopts \quad \= -C:-check:nooutput\_conversion:-fpe0:-debug:-traceback:... \quad \textless hi\textgreater parallel \textbf{trace} \\
177%         \%lopts \quad \> -C:-check:nooutput\_conversion:-fpe0:-debug:-traceback:... \quad \textless hi\textgreater parallel \textbf{trace} \\
178%         . \\
179%        \end{tabbing}};
180%   \end{tikzpicture}
181   \end{itemize}
182\end{frame}
183
184% Folie 6
185\begin{frame}
186   \frametitle{Debugging Runtime Errors (III)}
187   \footnotesize   
188   \tikzstyle{yellow1} = [rectangle, draw, fill=yellow!100, text width=0.2\textwidth, font=\scriptsize]
189   \tikzstyle{green1} = [rectangle, draw, fill=green!75!black, text width=0.1\textwidth, font=\scriptsize]
190   \tikzstyle{green2} = [rectangle, draw, fill=green!75!black, text width=0.15\textwidth, font=\scriptsize]
191   \tikzstyle{green3} = [rectangle, draw, fill=green!75!black, text width=0.34\textwidth, font=\scriptsize]
192   \tikzstyle{green4} = [rectangle, draw, fill=green!75!black, text width=0.41\textwidth, font=\scriptsize]
193   \tikzstyle{line} = [draw, -]
194   \tikzstyle{arrow} = [draw, -latex']
195   \begin{itemize}
196      \item{If you now call mbuild, it will first compile for the optimized version, and then for the debug version.
197            The pre-compiled code will be put into different make depositories, one for each block:} 
198   \end{itemize} 
199   \vspace{-5mm}
200   \begin{center}
201   \begin{tikzpicture} [auto]
202      \uncover<2->{
203      \node[yellow1] (currentversion) {current\_version/};
204      \node[green1] (trunk) [right=0.5cm of currentversion] {trunk/};
205      \node[green2] (source) [right=0.7cm of trunk] {SOURCE/};
206      \node[font=\tiny, text width= 4cm] (files) [right=0.3cm of source] {\textbf{Makefile}\\ 
207            \textbf{advec\_particles.f90}\\
208            \textbf{advec\_s\_bc.f90}\\
209            ...};
210      \node[green3](depository_parallel) [below=0.5cm of trunk] [xshift = 2cm] {MAKE\_DEPOSITORY\_parallel/};}
211      \uncover<3->{
212      \node[green4](depository_parallel_trace) [below=0.5cm of depository_parallel] [xshift = 0.25cm] {MAKE\_DEPOSITORY\_parallel\_trace/};}
213     
214      \uncover<2->{
215      \path[line] (currentversion) -- (trunk);
216      \path[line] (trunk.east) -- (source.west);
217      \path[line] (trunk.east) -- (source.north west);
218      \path[line] (trunk.east) -- (source.south west);
219      \path[line] (currentversion.east) -- (depository_parallel.west);}
220      \uncover<3->{\path[line] (currentversion.east) -- (depository_parallel_trace.west);}
221      \uncover<2->{
222      \path[line, thick] (7.3,-0.3) -- (7.3,-1.01);
223      \path[arrow, thick] (7.3,-1.01) -- (depository_parallel.east);}
224      \uncover<3->{
225      \path[line, thick] (7.7,-0.3) -- (7.7,-2.02);
226      \path[arrow, thick] (7.7,-2.02) -- (depository_parallel_trace.east);}
227   \end{tikzpicture}
228   \end{center}
229   \begin{itemize}
230      \item<4->{The {\tt mrun} option {\tt -K} defines, which version is used:}     
231   \end{itemize}
232   \uncover<4->{
233   \begin{tabbing}
234      \qquad \={\tt mrun ... -K parallel ...} will use the optimized version \\
235             \> {\tt mrun ... -K \dq parallel trace\dq ...} will use the debug version
236   \end{tabbing}  }
237   \uncover<5->{\begin{center} \textbf{Enabling debug options slows down the execution speed significantly!} \end{center}}
238\end{frame}
239
240
241% Folie 7
242\begin{frame}
243   \frametitle{Debugging Runtime Errors (IV)}
244   \footnotesize
245   \begin{itemize}
246      \item<2->{There are still some cases, where these options do not help or do not give enough information (e.g. concerning segmentations faults).} 
247      \item<3->{There are two ways of handling these cases:}
248      \begin{itemize}
249         \footnotesize
250         \item<4->{the print/write debugger}
251         \item<5->{debuggers like dbx or GUI-based debuggers like "'totalview"' or "'Allinea DDT"'}
252         \item<6->{more detailed informations about using Allinea DDT on the HLRN-III system are given under:
253               \par\medskip 
254               \textbf{https://www.hlrn.de/home/view/System3/AllineaDDT}\\
255               \ \\}
256         \item<7->{\texttt{mrun}-script will soon be adjusted for allowing to use the "'Allinea DDT"' debugger.}
257      \end{itemize}     
258   \end{itemize}
259\end{frame}
260
261\section{Print Statements}
262\subsection{Print Statements}
263
264% Folie 8
265\begin{frame}
266   \frametitle{Debugging With Print Statements (I)}
267   \tikzstyle{yellow1} = [rectangle, draw, fill=yellow!100, text width=0.2\textwidth, font=\scriptsize]
268   \tikzstyle{yellow2} = [rectangle, draw, fill=yellow!100, text width=0.1\textwidth, font=\scriptsize]
269   \tikzstyle{green1} = [rectangle, draw, fill=green!75!black, text width=0.25\textwidth, font=\scriptsize]
270   \tikzstyle{line} = [draw, -]
271   \scriptsize
272   \begin{itemize}
273      \item{ By adding appropriate print statements to the code
274            \begin{center} \begin{scriptsize}           
275            \begin{tabbing} 
276            \qquad\={\tt WRITE(9,*) 'now i am at \#1'} \\
277            \> {\tt CALL local\_flush( 9 )} \\
278            \> {\tt WRITE(9,*) 'now i am at \#2'} \\
279            \> {\tt CALL local\_flush( 9 )} \\
280            \> ... 
281            \end{tabbing}
282            \end{scriptsize} \end{center}
283            you can find the exact position (line number) within the code, where the error occurs.}
284      \par\medskip
285      \item<2->{Output can be found in files {\tt DEBUG\_0000}, {\tt DEBUG\_0001}, etc. in PALM's temporary working directory. You have to keep
286            this directory using  {\tt mrun}-option "'{\tt -B}"', because otherwise, the temporary working directory is deleted at the end of the run!}
287      \par\medskip
288      \item<5->{The name of PALM's temporary working directory is generated from environment variable {\tt tmp\_user\_catalog} (see {\tt .mrun.config}), the username, and a random number:
289            \begin{center} \par\medskip           
290            /\textless {\tt tmp\_user\_catalog}\textgreater/\textless {\tt username}\textgreater .\textless {\tt random number}\textgreater \end{center}}           
291   \end{itemize}   
292   \par\medskip
293   \begin{center}
294   \begin{tikzpicture}[auto]
295      \uncover<3->{
296      \node[yellow2] (home) {\$HOME/};
297      \node[yellow2][right=0.5cm of home] (palm) {palm/};
298      \node[yellow1] (currentversion) [right=0.5cm of palm] {current\_version/};     
299      \node[font=\tiny, text width = 2cm] (working) [below = 0.05cm of currentversion] {\textbf{working directory \\ of PALM user}};}
300      \uncover<4->{
301      \node[green1] (tmp_user) [below left=0.6cm of home] [xshift=0.6cm] {/tmp\_user\_catalog/};
302      \node[font=\tiny, text width = 4cm] (temporary) [right=0.1cm of tmp_user] {\textbf{contains temporary directory created \\ (by {\tt mrun}) for each PALM run}};}
303     
304      \uncover<3->{
305      \path[line] (home) -- (palm);
306      \path[line] (palm) -- (currentversion);}     
307   \end{tikzpicture}
308   \end{center}
309\end{frame}
310
311% Folie 9
312\begin{frame}
313   \frametitle{Debugging With Print Statements (II)}
314   \small
315   \begin{itemize}
316      \item<2->{After having located the position, you can try to find out which variable may have caused the error: \\
317            \par\medskip
318            {\tt WRITE(9,*) ' a=',a,' b=',b, ...}} 
319   \end{itemize}
320   \par\bigskip
321   \uncover<3->{
322   \textbf{Very important:}
323   Every output is buffered, i.e. it will not be directly written on disc. In case of program aborts, the buffer
324   contents are lost, so the output of the last write statements are not available. You have to prevent this problem
325   by flushing the buffer after each print/write statement:   
326   \begin{tabbing}
327      \qquad \={\tt WRITE(9,*) '...'} \\
328      \> {\tt CALL local\_flush( 9 )}
329   \end{tabbing}}   
330\end{frame}
331
332\end{document}
Note: See TracBrowser for help on using the repository browser.