% $Id: debugging.tex 1515 2015-01-02 11:35:51Z boeske $ \input{header_tmp.tex} %\input{../header_LECTURE.tex} \usepackage[utf8]{inputenc} \usepackage{ngerman} \usepackage{pgf} \usepackage{subfigure} \usepackage{units} \usepackage{multimedia} \usepackage{hyperref} \newcommand{\event}[1]{\newcommand{\eventname}{#1}} \usepackage{xmpmulti} \usepackage{tikz} \usetikzlibrary{shapes,arrows,positioning} \def\Tiny{\fontsize{4pt}{4pt}\selectfont} \usepackage{amsmath} \usepackage{amssymb} \usepackage{multicol} \usepackage{pdfcomment} \usepackage{graphicx} \usepackage{listings} \lstset{showspaces=false,language=fortran,basicstyle= \ttfamily,showstringspaces=false, captionpos=b,aboveskip=0pt,belowskip=0pt} \institute{Institute of Meteorology and Climatology, Leibniz Universität Hannover} \selectlanguage{english} \date{last update: \today} \event{PALM Seminar} \setbeamertemplate{navigation symbols}{} \setbeamersize{text margin left=.5cm,text margin right=.2cm} \setbeamertemplate{footline} { \begin{beamercolorbox}[rightskip=-0.1cm]& {\includegraphics[height=0.65cm]{imuk_logo.pdf}\hfill \includegraphics[height=0.65cm]{luh_logo.pdf}} \end{beamercolorbox} \begin{beamercolorbox}[ht=2.5ex,dp=1.125ex, leftskip=.3cm,rightskip=0.3cm plus1fil]{title in head/foot} {\leavevmode{\usebeamerfont{author in head/foot}\insertshortauthor} \hfill \eventname \hfill \insertframenumber \; / \inserttotalframenumber} \end{beamercolorbox} \begin{beamercolorbox}[colsep=1.5pt]{lower separation line foot} \end{beamercolorbox} } \title[PALM - Debugging]{PALM - Debugging} \author{PALM group} % Notes: % jede subsection bekommt einen punkt im menu (vertikal ausgerichtet. % jeder frame in einer subsection bekommt einen punkt (horizontal ausgerichtet) \begin{document} % Folie 1 \begin{frame} \titlepage \end{frame} \section{Principal Sources and First Steps} \subsection{Principal Sources and First Steps} % Folie 2 \begin{frame} \frametitle{Principal Sources of Errors} \small \textbf{PALM runs can give rise to a large variety of errors ...} \par\medskip Some of the main possible reasons for errors are: \begin{itemize} \item<2->{Missing or wrong options in the {\tt mrun} call} \item<3->{Errors in the configuration file} \item<4->{Errors in the NAMELIST parameter file} \item<5->{Errors in the {\tt ssh}-installation (authentication), if a remote host is used for batch jobs} \item<6->{FORTRAN errors in the user code (user-interface files)} \item<7->{PALM runtime errors due to} \begin{itemize} \item<7->{wrong parameter settings} \item<8->{errors in the user code} \item<9->{errors in PALM‘s default code, which have not been detected so far (e.g. because some parameter combinations have never been tried so far)} \end{itemize} \end{itemize} \end{frame} % Folie 3 \begin{frame} \frametitle{First Steps of Debugging} \footnotesize \begin{itemize} \item<2->{\textbf{Find out the principal reason of the error(s):}} \begin{itemize} \scriptsize \item<3->{ Carefully analyze the job protocol file (or messages on the terminal, in case of interactive runs) for any error messages or unexpected behaviour.} \vspace{1.5mm} \item<4->{In case of batch runs on a remote host, if the job protocol file is missing on the local host, try if you can find it in {\tt $\sim$/job\_queue} on the remote host.} \vspace{1.5mm} \item<5->{If the job has run into a time limit, no job protocol files or messages might be created at all (system depending).} \vspace{1.5mm} \item<6->{Some typical errors which may occur during execution of mrun are automatically detected and displayed by mrun in the job protocol or on the terminal: \\ \par\medskip Respective error messages will begin with "'+++"'.} \vspace{1.5mm} \item<7->{Compile and runtime error messages will only appear in the job protocol or on the terminal (in case of interactive runs).} \vspace{1.5mm} \item<8->{In case of runtime errors terminal messages may give first helpful hints about where errors are located.} \end{itemize} \end{itemize} \end{frame} \section{Runtime Errors} \subsection{Runtime Errors} % Folie 4 \begin{frame} \frametitle{Debugging Runtime Errors (I)} \small \begin{itemize} \item{In case of runtime errors, the available information depends on the compiler and on the compiler settings.} \item<2->{The default options for the Intel-compiler ({\tt-O3} for fast execution) give almost no information, e.g. about the subroutine or the line number of the code where the error occured. Execution is even continued in case of floating point errors!} \item<3->{Floating point error detection and traceback can be activated with compiler options \par\medskip {\tt ifort -fpe0 -debug -traceback -O0 ...}} \item<4->{The default {\tt .mrun.config.default} file contains an additional block with debug options. It can be used with mrun-call \par\medskip {\tt mrun ... -h \textless host\_identifier\textgreater \;-K \dq parallel trace\dq ...}} \end{itemize} \end{frame} % Folie 5 \begin{frame}[fragile] \frametitle{Debugging Runtime Errors (II)} \small \tikzstyle{yellow} = [rectangle, draw, fill=yellow!30, text width=0.9\textwidth, font=\Tiny] \tikzstyle{box} = [rectangle, draw, text width=0.95\textwidth, font=\scriptsize] \begin{itemize} \item{The configuration file {\tt .mrun.config.default} looks like this:} \\ \ \\ \onslide<2-> \begin{tikzpicture} \node [yellow]{\begin{lstlisting} . # The next line is just an example. Add your own line below or replace this line. %host_identifier myhostname lcmy # # The next block contains all informations for compiling the PALM code # This is the block for the optimized version %fopts -I::-fpe0:-O3:-xHost:... parallel %lopts -L::-fpe0:-O3:-xHost:... parallel . . # This is the block for the debug version %fopts -C:-check:nooutput_conversion:-fpe0:-debug:... parallel trace %lopts -C:-check:nooutput_conversion:-fpe0:-debug:... parallel trace . \end{lstlisting} }; \end{tikzpicture} % \begin{tikzpicture} % \node[box](box){ \begin{tabbing} % .\\ % \# The next line is just an example. Add your own line below or replace this line. \\ % \%host\_identifier \quad myhostname \quad lcmy \\ % \# \\ % \# The next block contains all informations for compiling the PALM code \\ % \textbf{\# This is the block for the optimized version} % . \\ % \%fopts \quad \= -xS:-nbs:-convert:... \qquad \= \textless hi\textgreater parallel \\ % \%lopts \> -xS:-nbs:-Vaxlib:-L:... \> \textless hi\textgreater parallel \\ % . \\ % . \\ % \textbf{\# This is the block for the debug version} % . \\ % \%fopts \quad \= -C:-check:nooutput\_conversion:-fpe0:-debug:-traceback:... \quad \textless hi\textgreater parallel \textbf{trace} \\ % \%lopts \quad \> -C:-check:nooutput\_conversion:-fpe0:-debug:-traceback:... \quad \textless hi\textgreater parallel \textbf{trace} \\ % . \\ % \end{tabbing}}; % \end{tikzpicture} \end{itemize} \end{frame} % Folie 6 \begin{frame} \frametitle{Debugging Runtime Errors (III)} \footnotesize \tikzstyle{yellow1} = [rectangle, draw, fill=yellow!100, text width=0.2\textwidth, font=\scriptsize] \tikzstyle{green1} = [rectangle, draw, fill=green!75!black, text width=0.1\textwidth, font=\scriptsize] \tikzstyle{green2} = [rectangle, draw, fill=green!75!black, text width=0.15\textwidth, font=\scriptsize] \tikzstyle{green3} = [rectangle, draw, fill=green!75!black, text width=0.34\textwidth, font=\scriptsize] \tikzstyle{green4} = [rectangle, draw, fill=green!75!black, text width=0.41\textwidth, font=\scriptsize] \tikzstyle{line} = [draw, -] \tikzstyle{arrow} = [draw, -latex'] \begin{itemize} \item{If you now call mbuild, it will first compile for the optimized version, and then for the debug version. The pre-compiled code will be put into different make depositories, one for each block:} \end{itemize} \vspace{-5mm} \begin{center} \begin{tikzpicture} [auto] \uncover<2->{ \node[yellow1] (currentversion) {current\_version/}; \node[green1] (trunk) [right=0.5cm of currentversion] {trunk/}; \node[green2] (source) [right=0.7cm of trunk] {SOURCE/}; \node[font=\tiny, text width= 4cm] (files) [right=0.3cm of source] {\textbf{Makefile}\\ \textbf{advec\_particles.f90}\\ \textbf{advec\_s\_bc.f90}\\ ...}; \node[green3](depository_parallel) [below=0.5cm of trunk] [xshift = 2cm] {MAKE\_DEPOSITORY\_parallel/};} \uncover<3->{ \node[green4](depository_parallel_trace) [below=0.5cm of depository_parallel] [xshift = 0.25cm] {MAKE\_DEPOSITORY\_parallel\_trace/};} \uncover<2->{ \path[line] (currentversion) -- (trunk); \path[line] (trunk.east) -- (source.west); \path[line] (trunk.east) -- (source.north west); \path[line] (trunk.east) -- (source.south west); \path[line] (currentversion.east) -- (depository_parallel.west);} \uncover<3->{\path[line] (currentversion.east) -- (depository_parallel_trace.west);} \uncover<2->{ \path[line, thick] (7.3,-0.3) -- (7.3,-1.01); \path[arrow, thick] (7.3,-1.01) -- (depository_parallel.east);} \uncover<3->{ \path[line, thick] (7.7,-0.3) -- (7.7,-2.02); \path[arrow, thick] (7.7,-2.02) -- (depository_parallel_trace.east);} \end{tikzpicture} \end{center} \begin{itemize} \item<4->{The {\tt mrun} option {\tt -K} defines, which version is used:} \end{itemize} \uncover<4->{ \begin{tabbing} \qquad \={\tt mrun ... -K parallel ...} will use the optimized version \\ \> {\tt mrun ... -K \dq parallel trace\dq ...} will use the debug version \end{tabbing} } \uncover<5->{\begin{center} \textbf{Enabling debug options slows down the execution speed significantly!} \end{center}} \end{frame} % Folie 7 \begin{frame} \frametitle{Debugging Runtime Errors (IV)} \footnotesize \begin{itemize} \item<2->{There are still some cases, where these options do not help or do not give enough information (e.g. concerning segmentations faults).} \item<3->{There are two ways of handling these cases:} \begin{itemize} \footnotesize \item<4->{the print/write debugger} \item<5->{debuggers like dbx or GUI-based debuggers like "'totalview"' or "'Allinea DDT"'} \item<6->{more detailed informations about using Allinea DDT on the HLRN-III system are given under: \par\medskip \textbf{https://www.hlrn.de/home/view/System3/AllineaDDT}\\ \ \\} \item<7->{\texttt{mrun}-script will soon be adjusted for allowing to use the "'Allinea DDT"' debugger.} \end{itemize} \end{itemize} \end{frame} \section{Print Statements} \subsection{Print Statements} % Folie 8 \begin{frame} \frametitle{Debugging With Print Statements (I)} \tikzstyle{yellow1} = [rectangle, draw, fill=yellow!100, text width=0.2\textwidth, font=\scriptsize] \tikzstyle{yellow2} = [rectangle, draw, fill=yellow!100, text width=0.1\textwidth, font=\scriptsize] \tikzstyle{green1} = [rectangle, draw, fill=green!75!black, text width=0.25\textwidth, font=\scriptsize] \tikzstyle{line} = [draw, -] \scriptsize \begin{itemize} \item{ By adding appropriate print statements to the code \begin{center} \begin{scriptsize} \begin{tabbing} \qquad\={\tt WRITE(9,*) 'now i am at \#1'} \\ \> {\tt CALL local\_flush( 9 )} \\ \> {\tt WRITE(9,*) 'now i am at \#2'} \\ \> {\tt CALL local\_flush( 9 )} \\ \> ... \end{tabbing} \end{scriptsize} \end{center} you can find the exact position (line number) within the code, where the error occurs.} \par\medskip \item<2->{Output can be found in files {\tt DEBUG\_0000}, {\tt DEBUG\_0001}, etc. in PALM's temporary working directory. You have to keep this directory using {\tt mrun}-option "'{\tt -B}"', because otherwise, the temporary working directory is deleted at the end of the run!} \par\medskip \item<5->{The name of PALM's temporary working directory is generated from environment variable {\tt tmp\_user\_catalog} (see {\tt .mrun.config}), the username, and a random number: \begin{center} \par\medskip /\textless {\tt tmp\_user\_catalog}\textgreater/\textless {\tt username}\textgreater .\textless {\tt random number}\textgreater \end{center}} \end{itemize} \par\medskip \begin{center} \begin{tikzpicture}[auto] \uncover<3->{ \node[yellow2] (home) {\$HOME/}; \node[yellow2][right=0.5cm of home] (palm) {palm/}; \node[yellow1] (currentversion) [right=0.5cm of palm] {current\_version/}; \node[font=\tiny, text width = 2cm] (working) [below = 0.05cm of currentversion] {\textbf{working directory \\ of PALM user}};} \uncover<4->{ \node[green1] (tmp_user) [below left=0.6cm of home] [xshift=0.6cm] {/tmp\_user\_catalog/}; \node[font=\tiny, text width = 4cm] (temporary) [right=0.1cm of tmp_user] {\textbf{contains temporary directory created \\ (by {\tt mrun}) for each PALM run}};} \uncover<3->{ \path[line] (home) -- (palm); \path[line] (palm) -- (currentversion);} \end{tikzpicture} \end{center} \end{frame} % Folie 9 \begin{frame} \frametitle{Debugging With Print Statements (II)} \small \begin{itemize} \item<2->{After having located the position, you can try to find out which variable may have caused the error: \\ \par\medskip {\tt WRITE(9,*) ' a=',a,' b=',b, ...}} \end{itemize} \par\bigskip \uncover<3->{ \textbf{Very important:} Every output is buffered, i.e. it will not be directly written on disc. In case of program aborts, the buffer contents are lost, so the output of the last write statements are not available. You have to prevent this problem by flushing the buffer after each print/write statement: \begin{tabbing} \qquad \={\tt WRITE(9,*) '...'} \\ \> {\tt CALL local\_flush( 9 )} \end{tabbing}} \end{frame} \end{document}