Ignore:
Timestamp:
Aug 7, 2012 4:03:47 PM (12 years ago)
Author:
maronga
Message:

tutorial updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/TUTORIAL/SOURCE/parallelization.tex

    r945 r973  
    108108
    109109         \node (center) at (0,1) {};
    110          \onslide<2-> \node (Network) at (-3.5,1) [ellipse,fill=green!20] {Network};
     110         \onslide<2-> \node (Network) at (-3.5,1) [draw, ellipse,fill=green!20] {Network};
    111111         \node (dis_mem) at (-3.5,-1) [text width=0.28\textwidth] {\footnotesize \textbf{distributed} memory\\(Cray-T3E)};
    112112         \onslide<3-> \node (add_mem) at (3.5,1) [rectangle, draw] {adressable memory};
     
    116116         \onslide<6-> \node (clustered_systems) at (0,-3) [draw, text width=0.15\textwidth] {clustered systems};
    117117         \node (cs_info) at (0,-4.2) [text width=0.4\textwidth] {\footnotesize (IBM-Regatta, Linux-Cluster,
    118 NEC-SX, SGI-ICE, Cray-XT4)};
     118NEC-SX, SGI-ICE, Cray-XE6)};
    119119
    120120% Adressable memory node (big)
     
    138138% Adressable memory node (small)   
    139139         \onslide<4->
    140          \draw[->, thick] (1.5,0.2) -- (0.4,0.2) ;   
    141          \node at (0,0.2) [scale=0.2] {%
    142          \begin{tikzpicture}
     140           
     141         \node (small_node) at (-2,0.6) [scale=0.2] {%
     142            \begin{tikzpicture}
     143
     144               \node (add_mem_small) at (3.5,0.9) [ultra thick, rectangle, draw, minimum width=3cm] {};
     145
     146               \node (p1_small) at (2,-0.05) [ultra thick, draw,circle, scale=0.9] {};
     147               \node (p2_small) at (2.6,-0.05) [ultra thick, draw,circle, scale=0.9] {};
     148               \node (p3_small) at (3.2,-0.05) [ultra thick, draw,circle, scale=0.9] {};
     149               \node (p4_small) at (3.8,-0.05) [ultra thick, draw,circle, scale=0.9] {};
     150               \node (p5_small) at (4.4,-0.05) [ultra thick, draw,circle, scale=0.9] {};
     151               \node (p6_small) at (5,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    143152           
    144             \node (add_mem_small) at (3.5,0.9) [ultra thick, rectangle, draw, minimum width=3cm] {};
    145             \node (p1_small) at (2,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    146             \node (p2_small) at (2.6,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    147             \node (p3_small) at (3.2,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    148             \node (p4_small) at (3.8,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    149             \node (p5_small) at (4.4,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    150             \node (p6_small) at (5,-0.05) [ultra thick, draw,circle, scale=0.9] {};
    151          
    152             \draw[-, ultra thick] (add_mem_small.south) -- (3.5,0.4);
    153             \draw[-, ultra thick] (2,0.4) -- (5,0.4);
    154             \draw[-, ultra thick] (2,0.4) -- (p1_small);
    155             \draw[-, ultra thick] (2.6,0.4) -- (p2_small);
    156             \draw[-, ultra thick] (3.2,0.4) -- (p3_small);         
    157             \draw[-, ultra thick] (3.8,0.4) -- (p4_small);
    158             \draw[-, ultra thick] (4.4,0.4) -- (p5_small);         
    159             \draw[-, ultra thick] (5,0.4) -- (p6_small);
     153               \draw[-, ultra thick] (add_mem_small.south) -- (3.5,0.4);
     154               \draw[-, ultra thick] (2,0.4) -- (5,0.4);
     155               \draw[-, ultra thick] (2,0.4) -- (p1_small);
     156               \draw[-, ultra thick] (2.6,0.4) -- (p2_small);
     157               \draw[-, ultra thick] (3.2,0.4) -- (p3_small);         
     158               \draw[-, ultra thick] (3.8,0.4) -- (p4_small);
     159               \draw[-, ultra thick] (4.4,0.4) -- (p5_small);         
     160               \draw[-, ultra thick] (5,0.4) -- (p6_small);
     161               
     162               
     163            \end{tikzpicture}
     164            } ;
    160165           
    161            
    162          \end{tikzpicture}
    163          } ;
    164          
     166         \draw[->, thick] (1.5,0.2) -- (small_node) ; 
     167         \draw[-] (-2.7,0.75) -- (-2.3,0.725);
    165168         \onslide<5->
    166          \node (add_info) at (0,-0.1) [scale=0.9] {\scriptsize node};
     169         \node[below=-0.1cm of small_node] (add_info) [scale=0.9] {\scriptsize node};
    167170
    168171% Black Arrows
     
    177180% OpenMP Arrows         
    178181         \onslide<8-> \draw[->, ultra thick, color=yellow] (3.5,-2.6) -- (3.5,-1.5) ;
    179          \draw[->, ultra thick, color=yellow] (2.5,-2.8) -- (-2.5,0.7) ;
     182         \draw[->, ultra thick, color=yellow] (2.5,-2.8) -- (-2.0,0.1) ;
    180183         
    181184% Network decorations
     
    209212         \draw[-] (-2.45,1.0) -- (pr6);
    210213         \draw[-] (mem6) -- (pr6);   
    211                            
     214               
     215         \onslide<1->           
    212216      \end{tikzpicture}
    213217   \end{center}
     
    245249   \begin{itemize}
    246250      \item<10-> Alternatively, a 1D-decomposition along $x$ or $y$ may be used in case of slow networks, but this generally doesn't scale for processor numbers $>$ 256.
     251      \vspace{2mm}
    247252      \item<11-> Message passing is realized using MPI.
     253      \vspace{2mm}
    248254      \item<12-> OpenMP parallelization as well as mixed usage of OpenMP and
    249255MPI is also possible. (OpenMP tests and optimization is under way)
     
    279285         \onslide<5-> \includegraphics[width=0.8\textwidth]{parallelization_figures/fft.png} \end{center}
    280286         \vspace{-4mm}
    281          \textbf{Example: transpositions for solving the poisson equation}
     287         \textbf{Example: transpositions for solving the Poisson\\ \hspace{4em}equation}
    282288      \end{column}
    283289   \end{columns}   
     
    353359\begin{frame}
    354360   \frametitle{Virtual Processor Grid Used in PALM}   
    355    \footnotesize
    356    The processor grid and special data types are defined in file \texttt{init\_pegrid.f90}
     361   \scriptsize
     362   \vspace{2mm}
     363   The processor grid and special data types are defined in file \texttt{init\_pegrid.f90}\\
     364   \ \\
    357365   \begin{itemize}
    358366      \item<2-> PALM uses a two-dimensional virtual processor grid (in case of a 1D-decomposition, it has only one element along $y$). It is defined by a so called communicator (here: \texttt{comm2d}):\\
    359       \scriptsize
     367      \tiny
     368      \vspace{1.5mm}
    360369      \quad \texttt{ndim = 2}\\
    361            \quad \texttt{pdims(1) = npex    ! \# of processors along x}\\
    362            \quad \texttt{pdims(2) = npey    ! \# of processors along y}\\
     370           \quad \texttt{pdims(1) = npex  \quad  ! \# of processors along x}\\
     371           \quad \texttt{pdims(2) = npey  \quad  ! \# of processors along y}\\
    363372           \quad \texttt{cyclic(1) = .TRUE.}\\
    364373           \quad \texttt{cyclic(2) = .TRUE.}\\
    365 
    366            \quad \texttt{CALL MPI\underline{\ }CART\underline{\ }CREATE( MPI\underline{\ }COMM\underline{\ }WORLD, ndim, pdims, cyclic,  \&}\\
    367            \quad \texttt{\hspace{10.5em} reorder, \textcolor{blue}{comm2d}, ierr )} 
     374      \ \\
     375           \quad \texttt{CALL MPI\underline{\ }CART\underline{\ }CREATE( MPI\underline{\ }COMM\underline{\ }WORLD, ndim, pdims, cyclic, reorder, \&}\\
     376           \quad \texttt{\hspace{10.5em} \textcolor{blue}{comm2d}, ierr )}
     377           \scriptsize
     378      \vspace{4mm}
    368379      \item<3-> The processor number (id) with respect to this processor grid, \texttt{myid}, is given by:\\
    369       \scriptsize
    370       \quad \texttt{CALL MPI\underline{\ }COMM\underline{\ }RANK( comm2d, \textcolor{blue}{myid}, ierr )}     
     380      \tiny
     381      \vspace{1.5mm}
     382      \quad \texttt{CALL MPI\underline{\ }COMM\underline{\ }RANK( comm2d, \textcolor{blue}{myid}, ierr )}   
     383      \scriptsize   
     384      \vspace{4mm}
    371385      \item<4-> The ids of the neighbouring PEs are determined by:\\
    372       \scriptsize
     386      \tiny
     387      \vspace{1.5mm}
    373388      \quad \texttt{CALL MPI\underline{\ }CARD\underline{\ }SHIFT( comm2d, 0, 1, \textcolor{blue}{pleft},  \textcolor{blue}{pright}, ierr )}\\
    374       \quad \texttt{CALL MPI\underline{\ }CARD\underline{\ }SHIFT( comm2d, 1, 1, \textcolor{blue}{psouth}, \textcolor{blue}{pnorth}, ierr )}
     389      \quad \texttt{CALL MPI\underline{\ }CARD\underline{\ }SHIFT( comm2d, 1, 1, \textcolor{blue}{psouth}, \textcolor{blue}{pnorth}, ierr )}\\
    375390   \end{itemize}
    376391\end{frame}
     
    383398            \item<1-> Ghost points are stored in additional array elements added at the horizontal boundaries of the subdomains, e.g.\\
    384399            \tiny
     400            \vspace{2mm}
    385401            \quad \texttt{u(:,:,nxl\textcolor{blue}{-ngl}), u(:,:,nxr\textcolor{blue}{+ngl})    ! left and right boundary}\\
    386402            \quad \texttt{u(:,nys\textcolor{blue}{-ngl},:), u(:,nyn\textcolor{blue}{+ngl},:)    ! south and north boundary}\\
    387             \hspace{3mm}
     403            \vspace{4mm}
    388404            \item<2-> \scriptsize The exchange of ghost points is done in file \texttt{exchange\underline{\ }horiz.f90}\\
    389405            \textbf{\underline{Simplified} example:} synchroneous exchange of ghost points along $x$ ($yz$-planes, send left, receive right plane):\\
    390406            \tiny
     407            \vspace{2mm}
    391408            \quad \texttt{CALL MPI\underline{\ }SENDRECV( ar(nzb,nys-\textcolor{blue}{ngl},nxl),   ngp\underline{\ }yz, MPI\underline{\ }REAL, pleft,  0,}\\
    392409            \quad \texttt{\hspace{9.5em}ar(nzb,nys-\textcolor{blue}{ngl},nxr+1), ngp\underline{\ }yz, MPI\underline{\ }REAL, pright, 0,}\\
    393410            \quad \texttt{\hspace{9.5em}comm2d, status, ierr )}\\
    394             \hspace{3mm}
     411            \vspace{4mm}
    395412            \item<3-> \scriptsize In the real code special MPI data types (vectors) are defined for exchange of $yz$/$xz$-planes for performance reasons and because array elements to be exchanged are not consecutively stored in memory for $xz$-planes:\\
    396413            \tiny
     414            \vspace{2mm}
    397415            \quad \texttt{ngp\underline{\ }yz(0) = (nzt - nzb + 2) * (nyn - nys + 1 + 2 * \textcolor{blue}{ngl} )}\\
    398416            \quad \texttt{CALL MPI\underline{\ }TYPE\underline{\ }VECTOR( \textcolor{blue}{ngl}, ngp\underline{\ }yz(0), ngp\underline{\ }yz(0), MPI\underline{\ }REAL, type\underline{\ }yz(0), ierr )}\\
    399             \quad \texttt{CALL MPI\underline{\ }TYPE\underline{\ }COMMIT( type\underline{\ }xz(0), ierr )   ! see file init\underline{\ }pegrid.f90}\\
     417            \quad \texttt{CALL MPI\underline{\ }TYPE\underline{\ }COMMIT( type\underline{\ }yz(0), ierr )   ! see file init\underline{\ }pegrid.f90}\\
    400418            \ \\
    401419            \quad \texttt{CALL MPI\underline{\ }SENDRECV( ar(nzb,nys-ngl,nxl), type\underline{\ }yz(grid\underline{\ }level), MPI\underline{\ }REAL, pleft, 0, ...}\\
     
    431449   \frametitle{Parallel I/O}
    432450   \scriptsize
     451   \vspace{-2mm}
    433452   \begin{columns}[T]
    434453      \begin{column}{1.1\textwidth}
     
    449468         \onslide<4-> \textbf{General comment:}
    450469         \begin{itemize}
    451             \item Parallel I/O on a large number of files ($>$1000) currently may cause severe file system problems (e.g. on Lustre file systems). A workaround for this problem will\\ be available soon.
     470            \item Parallel I/O on a large number of files ($>$1000) currently may cause severe file system problems (e.g. on Lustre file systems).\\ \textbf{Workaround:} reduce the maximum number of parallel I/O streams\\ \hspace{5.75em}(see \texttt{mrun}-options)
    452471         \end{itemize}
    453472      \end{column}
Note: See TracChangeset for help on using the changeset viewer.