source: palm/trunk/TUTORIAL/SOURCE/debugging.tex @ 1272

Last change on this file since 1272 was 1232, checked in by kanani, 11 years ago

some changes for more generic version of tutorial

  • Property svn:keywords set to Id
File size: 13.2 KB
Line 
1% $Id: debugging.tex 1232 2013-09-20 11:56:41Z witha $
2\input{header_tmp.tex}
3%\input{../header_LECTURE.tex}
4
5\usepackage[utf8]{inputenc}
6\usepackage{ngerman}
7\usepackage{pgf}
8\usetheme{Dresden}
9\usepackage{subfigure}
10\usepackage{units}
11\usepackage{multimedia}
12\usepackage{hyperref}
13\newcommand{\event}[1]{\newcommand{\eventname}{#1}}
14\usepackage{xmpmulti}
15\usepackage{tikz}
16\usetikzlibrary{shapes,arrows,positioning,decorations.pathreplacing}
17\def\Tiny{\fontsize{4pt}{4pt}\selectfont}
18
19%---------- neue Pakete
20\usepackage{amsmath}
21\usepackage{amssymb}
22\usepackage{multicol}
23\usepackage{pdfcomment}
24\usepackage{xcolor}
25
26\institute{Institut fÌr Meteorologie und Klimatologie, Leibniz UniversitÀt Hannover}
27\date{last update: \today}
28\event{PALM Seminar}
29\setbeamertemplate{navigation symbols}{}
30\setbeamersize{text margin left=.5cm,text margin right=.2cm}
31\setbeamertemplate{footline}
32  {%
33    \begin{beamercolorbox}[rightskip=-0.1cm]&
34     {\includegraphics[height=0.65cm]{imuk_logo.pdf}\hfill \includegraphics[height=0.65cm]{luh_logo.pdf}}
35    \end{beamercolorbox}
36    \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex,%
37      leftskip=.3cm,rightskip=0.3cm plus1fil]{title in head/foot}%
38      {\leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor} \hfill \eventname \hfill \insertframenumber \; / \inserttotalframenumber}%
39    \end{beamercolorbox}%
40%    \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot}%
41%    \end{beamercolorbox}
42  }%\logo{\includegraphics[width=0.3\textwidth]{luhimuk_logo.eps}}
43
44\title[PALM - Debugging]{PALM - Debugging}
45\author{Siegfried Raasch}
46
47% Notes:
48% jede subsection bekommt einen punkt im menu (vertikal ausgerichtet.
49% jeder frame in einer subsection bekommt einen punkt (horizontal ausgerichtet)
50\begin{document}
51% Folie 1
52\begin{frame}
53\titlepage
54\end{frame}
55
56\section{Principal Sources and First Steps}
57\subsection{Principal Sources and First Steps}
58
59% Folie 2
60\begin{frame}
61   \frametitle{Principal Sources of Errors}
62   \small
63   \textbf{PALM runs can give rise to a large variety of errors ...}
64   \par\medskip
65   Some of the main possible reasons for errors are:
66   \begin{itemize}
67      \item<2->{Missing or wrong options in the {\tt mrun} call}
68      \item<3->{Errors in the configuration file}
69      \item<4->{Errors in the NAMELIST parameter file}
70      \item<5->{Errors in the {\tt ssh}-installation (authentication), if a remote host is used for batch jobs}
71      \item<6->{FORTRAN errors in the user code (user-interface files)} 
72      \item<7->{PALM runtime errors due to}
73      \begin{itemize}
74         \item<7->{wrong parameter settings}
75         \item<8->{errors in the user code}
76         \item<9->{errors in PALM‘s default code, which have not been detected so far (e.g. because some parameter combinations have never been tried so far)}
77      \end{itemize}
78   \end{itemize}
79\end{frame}
80
81% Folie 3
82\begin{frame}
83   \frametitle{First Steps of Debugging}
84   \footnotesize
85   \begin{itemize}
86      \item<2->{\textbf{Find out the principal reason of the error(s):}}
87      \begin{itemize}
88         \item<3->{ Carefully analyze the job protocol file (or messages on the terminal, in case of interactive runs)
89               for any error messages or unexpected behaviour.}
90         \item<4->{In case of batch runs on a remote host, if the job protocol file is missing on the local host,
91               try if you can find it in {\tt $\sim$/job\_queue} on the remote host.}
92         \item<5->{If the job has run into a time limit, no job protocol files or messages might be created at all (system depending).}
93         \item<6->{Some typical errors which may occur during execution of mrun are automatically detected and displayed
94               by mrun in the job protocol or on the terminal: \\
95               \par\medskip
96               Respective error messages will begin with "'+++"'.} 
97         \item<7->{Compile and run time error messages will only appear in the job protocol or on the terminal (in case of interactive runs).}       
98      \end{itemize}
99   \end{itemize}
100\end{frame}
101
102\section{Runtime Errors}
103\subsection{Runtime Errors}
104
105% Folie 4
106\begin{frame}
107   \frametitle{Debugging Runtime Errors (I)}
108   \small
109   \begin{itemize}
110      \item{In case of runtime errors, the available information depends on the compiler and on the compiler settings.}
111      \item<2->{The default options for the Intel-compiler ({\tt-O3} for fast execution) give
112            almost no information, e.g. about the subroutine or the line number of the code where the error occured.
113            Execution is even continued in case of floating point errors!}
114      \item<3->{Floating point error detection and traceback can be activated with compiler options
115            \par\medskip 
116            {\tt ifort -fpe0 -debug -traceback -O0 ...}}
117      \item<4->{The default {\tt .mrun.config.default} file contains an additional block with debug options. It can be used with mrun-call
118            \par\medskip
119            {\tt mrun ... -h \textless host\_identifier\textgreater \;-K \dq parallel trace\dq ...}}
120   \end{itemize}
121\end{frame}
122
123% Folie 5
124\begin{frame}
125   \frametitle{Debugging Runtime Errors (II)}
126   \small
127   \tikzstyle{box} = [rectangle, draw, text width=0.9\textwidth, font=\scriptsize]
128   \begin{itemize}
129      \item{The configuration file {\tt .mrun.config.default} looks like this:} 
130   \end{itemize}
131   \uncover<2->{
132   \begin{tikzpicture}     
133      \node[box](box){ \begin{tabbing} 
134         .\\
135         \# The next line is just an example. Add your own line below or replace this line. \\
136         \%host\_identifier \quad myhostname  \textless hi\textgreater \\
137         \# \\
138         \# The next block contains all informations for compiling the PALM code \\
139         \textbf{\# This is the block for the optimized version}
140         . \\
141         \%fopts \qquad \= -xS:-fpe0:-O3:-r8:... \qquad \textless hi\textgreater parallel \\
142         \%lopts \> -xS:-fpe0:-O3:-r8:... \qquad \textless hi\textgreater parallel \\
143         . \\
144         . \\
145         \textbf{\# This is the block for the debug version}
146         . \\
147         \%fopts \qquad \= -C:-fpe0:-debug:-traceback:-O0:... \qquad \textless hi\textgreater parallel \textbf{trace} \\
148         \%lopts \qquad \> -C:-fpe0:-debug:-traceback:-O0:... \qquad \textless hi\textgreater parallel \textbf{trace} \\
149         . \\
150        \end{tabbing}};
151   \end{tikzpicture}}
152\end{frame}
153
154% Folie 6
155\begin{frame}
156   \frametitle{Debugging Runtime Errors (III)}
157   \footnotesize   
158   \tikzstyle{yellow1} = [rectangle, draw, fill=yellow!100, text width=0.2\textwidth, font=\scriptsize]
159   \tikzstyle{green1} = [rectangle, draw, fill=green!75!black, text width=0.1\textwidth, font=\scriptsize]
160   \tikzstyle{green2} = [rectangle, draw, fill=green!75!black, text width=0.15\textwidth, font=\scriptsize]
161   \tikzstyle{green3} = [rectangle, draw, fill=green!75!black, text width=0.35\textwidth, font=\scriptsize]
162   \tikzstyle{green4} = [rectangle, draw, fill=green!75!black, text width=0.4\textwidth, font=\scriptsize]
163   \tikzstyle{line} = [draw, -]
164   \tikzstyle{arrow} = [draw, -latex']
165   \begin{itemize}
166      \item{If you now call mbuild, it will first compile for the optimized version, and then for the debug version.
167            The pre-compiled code will be put into different make depositories, one for each block:} 
168   \end{itemize} 
169   \begin{center}
170   \begin{tikzpicture} [auto]
171      \uncover<2->{
172      \node[yellow1] (currentversion) {current\_version/};
173      \node[green1] (trunk) [right=0.5cm of currentversion] {trunk/};
174      \node[green2] (source) [right=0.7cm of trunk] {SOURCE/};
175      \node[font=\tiny, text width= 4cm] (files) [right=0.3cm of source] {\textbf{Makefile}\\ 
176            \textbf{advec\_particles.f90}\\
177            \textbf{advec\_s\_bc.f90}\\
178            ...};
179      \node[green3](depository_parallel) [below=0.5cm of trunk] [xshift = 2cm] {MAKE\_DEPOSITORY\_parallel/};}
180      \uncover<3->{
181      \node[green4](depository_parallel_trace) [below=0.5cm of depository_parallel] [xshift = 0.25cm] {MAKE\_DEPOSITORY\_parallel\_trace/};}
182     
183      \uncover<2->{
184      \path[line] (currentversion) -- (trunk);
185      \path[line] (trunk.east) -- (source.west);
186      \path[line] (trunk.east) -- (source.north west);
187      \path[line] (trunk.east) -- (source.south west);
188      \path[line] (currentversion.east) -- (depository_parallel.west);}
189      \uncover<3->{\path[line] (currentversion.east) -- (depository_parallel_trace.west);}
190      \uncover<2->{
191      \path[line, thick] (7.3,-0.3) -- (7.3,-1.01);
192      \path[arrow, thick] (7.3,-1.01) -- (depository_parallel.east);}
193      \uncover<3->{
194      \path[line, thick] (7.7,-0.3) -- (7.7,-2.02);
195      \path[arrow, thick] (7.7,-2.02) -- (depository_parallel_trace.east);}
196   \end{tikzpicture}
197   \end{center}
198   \begin{itemize}
199      \item<4->{The {\tt mrun} option {\tt -K} defines, which version is used:}     
200   \end{itemize}
201   \uncover<4->{
202   \begin{tabbing}
203      \qquad \={\tt mrun ... -K parallel ...} will use the optimized version \\
204             \> {\tt mrun ... -K \dq parallel trace\dq ...} will use the debug version
205   \end{tabbing}  }
206   \uncover<5->{\begin{center} \textbf{Enabling debug options slows down the execution speed significantly!} \end{center}}
207\end{frame}
208
209
210% Folie 7
211\begin{frame}
212   \frametitle{Debugging Runtime Errors (IV)}
213   \footnotesize
214   \begin{itemize}
215      \item<2->{There are still some cases, where these options do not help or do not give enough information (e.g. concerning segmentations faults).} 
216      \item<3->{There are two ways of handling these cases:}
217      \begin{itemize}
218         \footnotesize
219         \item<4->{the print/write debugger}
220         \item<5->{debuggers like dbx or GUI-based debuggers like totalview \\(see \textbf{www.totalviewtech.com)}}
221         \item<6->{more detailed informations about using totalview on the HLRN system are given under:
222               \par\medskip 
223               \textbf{http://palm.muk.uni-hannover.de/wiki/doc/app/machine/hlrnII}}
224      \end{itemize}     
225   \end{itemize}
226\end{frame}
227
228\section{Print Statements}
229\subsection{Print Statements}
230
231% Folie 8
232\begin{frame}
233   \frametitle{Debugging With Print Statements (I)}
234   \tikzstyle{yellow1} = [rectangle, draw, fill=yellow!100, text width=0.2\textwidth, font=\scriptsize]
235   \tikzstyle{yellow2} = [rectangle, draw, fill=yellow!100, text width=0.1\textwidth, font=\scriptsize]
236   \tikzstyle{green1} = [rectangle, draw, fill=green!75!black, text width=0.25\textwidth, font=\scriptsize]
237   \tikzstyle{line} = [draw, -]
238   \scriptsize
239   \begin{itemize}
240      \item{ By adding appropriate print statements to the code
241            \begin{center} \begin{scriptsize}           
242            \begin{tabbing} 
243            \qquad\={\tt WRITE(9,*) 'now i am at \#1'} \\
244            \> {\tt CALL local\_flush( 9 )} \\
245            \> {\tt WRITE(9,*) 'now i am at \#2'} \\
246            \> {\tt CALL local\_flush( 9 )} \\
247            \> ... 
248            \end{tabbing}
249            \end{scriptsize} \end{center}
250            you can find the exact position (line number) within the code, where the error occurs.}
251      \par\medskip
252      \item<2->{Output can be found in files {\tt DEBUG\_0000}, {\tt DEBUG\_0001}, etc. in PALM's temporary working directory. You have to keep
253            this directory using  {\tt mrun}-option "'{\tt -B}"', because otherwise, the temporary working directory is deleted at the end of the run!}
254      \par\medskip
255      \item<5->{The name of PALM's temporary working directory is generated from environment variable {\tt tmp\_user\_catalog} (see {\tt .mrun.config}), the username, and a random number:
256            \begin{center} \par\medskip           
257            /\textless {\tt tmp\_user\_catalog}\textgreater/\textless {\tt username}\textgreater .\textless {\tt random number}\textgreater \end{center}}           
258   \end{itemize}   
259   \par\medskip
260   \begin{center}
261   \begin{tikzpicture}[auto]
262      \uncover<3->{
263      \node[yellow2] (home) {\$HOME/};
264      \node[yellow2][right=0.5cm of home] (palm) {palm/};
265      \node[yellow1] (currentversion) [right=0.5cm of palm] {current\_version/};     
266      \node[font=\tiny, text width = 2cm] (working) [below = 0.05cm of currentversion] {\textbf{working directory \\ of PALM user}};}
267      \uncover<4->{
268      \node[green1] (tmp_user) [below left=0.6cm of home] [xshift=0.6cm] {/tmp\_user\_catalog/};
269      \node[font=\tiny, text width = 4cm] (temporary) [right=0.1cm of tmp_user] {\textbf{contains temporary directory created \\ (by {\tt mrun}) for each PALM run}};}
270     
271      \uncover<3->{
272      \path[line] (home) -- (palm);
273      \path[line] (palm) -- (currentversion);}     
274   \end{tikzpicture}
275   \end{center}
276\end{frame}
277
278% Folie 9
279\begin{frame}
280   \frametitle{Debugging With Print Statements (II)}
281   \small
282   \begin{itemize}
283      \item<2->{After having located the position, you can try to find out which variable may have caused the error: \\
284            \par\medskip
285            {\tt WRITE(9,*) ' a=',a,' b=',b, ...}} 
286   \end{itemize}
287   \par\bigskip
288   \uncover<3->{
289   \textbf{Very important:}
290   Every output is buffered, i.e. it will not be directly written on disc. In case of program aborts, the buffer
291   contents are lost, so the output of the last write statements are not available. You have to prevent this problem
292   by flushing the buffer after each print/write statement:   
293   \begin{tabbing}
294      \qquad \={\tt WRITE(9,*) '...'} \\
295      \> {\tt CALL local\_flush( 9 )}
296   \end{tabbing}}   
297\end{frame}
298
299\end{document}
Note: See TracBrowser for help on using the repository browser.