source: palm/trunk/SOURCE/cpu_statistics.f90 @ 1060

Last change on this file since 1060 was 1037, checked in by raasch, 12 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 10.5 KB
Line 
1 SUBROUTINE cpu_statistics
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! -----------------
22!
23!
24! Former revisions:
25! -----------------
26! $Id: cpu_statistics.f90 1037 2012-10-22 14:10:22Z raasch $
27!
28! 1036 2012-10-22 13:43:42Z raasch
29! code put under GPL (PALM 3.9)
30!
31! 1015 2012-09-27 09:23:24Z raasch
32! output of accelerator board information
33!
34! 683 2011-02-09 14:25:15Z raasch
35! output of handling of ghostpoint exchange
36!
37! 622 2010-12-10 08:08:13Z raasch
38! output of handling of collective operations
39!
40! 222 2009-01-12 16:04:16Z letzel
41! Bugfix for nonparallel execution
42!
43! 197 2008-09-16 15:29:03Z raasch
44! Format adjustments in order to allow CPU# > 999,
45! data are collected from PE0 in an ordered sequence which seems to avoid
46! hanging of processes on SGI-ICE
47!
48! 82 2007-04-16 15:40:52Z raasch
49! Preprocessor directives for old systems removed
50!
51! RCS Log replace by Id keyword, revision history cleaned up
52!
53! Revision 1.13  2006/04/26 12:10:51  raasch
54! Output of number of threads per task, max = min in case of 1 PE
55!
56! Revision 1.1  1997/07/24 11:11:11  raasch
57! Initial revision
58!
59!
60! Description:
61! ------------
62! Analysis and output of the cpu-times measured. All PE results are collected
63! on PE0 in order to calculate the mean cpu-time over all PEs and other
64! statistics. The output is sorted according to the amount of cpu-time consumed
65! and output on PE0.
66!------------------------------------------------------------------------------!
67
68    USE cpulog
69    USE pegrid
70    USE control_parameters
71
72    IMPLICIT NONE
73
74    INTEGER    ::  i, ii(1), iii, lp, sender
75    REAL, SAVE ::  norm = 1.0
76    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
77    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
78
79
80!
81!-- Compute cpu-times in seconds
82    log_point%mtime  = log_point%mtime  / norm
83    log_point%sum    = log_point%sum    / norm
84    log_point%vector = log_point%vector / norm
85    WHERE ( log_point%counts /= 0 )
86       log_point%mean = log_point%sum / log_point%counts
87    END WHERE
88
89
90!
91!-- Collect cpu-times from all PEs and calculate statistics
92    IF ( myid == 0 )  THEN
93!
94!--    Allocate and initialize temporary arrays needed for statistics
95       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
96                 pe_rms( SIZE( log_point ) ),                              &
97                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
98       pe_min = log_point%sum
99       pe_max = log_point%sum    ! need to be set in case of 1 PE
100       pe_rms = 0.0
101
102#if defined( __parallel )
103!
104!--    Receive data from all PEs
105       DO  i = 1, numprocs-1
106          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
107                         i, i, comm2d, status, ierr )
108          sender = status(MPI_SOURCE)
109          pe_log_points(:,sender) = pe_max
110       ENDDO
111       pe_log_points(:,0) = log_point%sum   ! Results from PE0
112!
113!--    Calculate mean of all PEs, store it on log_point%sum
114!--    and find minimum and maximum
115       DO  iii = 1, SIZE( log_point )
116          DO  i = 1, numprocs-1
117             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
118             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
119             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
120          ENDDO
121          log_point(iii)%sum = log_point(iii)%sum / numprocs
122!
123!--       Calculate rms
124          DO  i = 0, numprocs-1
125             pe_rms(iii) = pe_rms(iii) + ( &
126                                 pe_log_points(iii,i) - log_point(iii)%sum &
127                                         )**2
128          ENDDO
129          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
130       ENDDO
131    ELSE
132!
133!--    Send data to PE0 (pe_max is used as temporary storage to send
134!--    the data in order to avoid sending the data type log)
135       ALLOCATE( pe_max( SIZE( log_point ) ) )
136       pe_max = log_point%sum
137       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
138                      ierr )
139#endif
140
141    ENDIF
142
143!
144!-- Write cpu-times
145    IF ( myid == 0 )  THEN
146!
147!--    Re-store sums
148       ALLOCATE( sum( SIZE( log_point ) ) )
149       WHERE ( log_point%counts /= 0 )
150          sum = log_point%sum
151       ELSEWHERE
152          sum = -1.0
153       ENDWHERE
154
155!
156!--    Write cpu-times sorted by size
157       CALL check_open( 18 )
158#if defined( __parallel )
159       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
160                          numprocs * threads_per_task, pdims(1), pdims(2), &
161                          threads_per_task
162       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 108 )  num_acc_per_node
163       WRITE ( 18, 110 )
164#else
165       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
166                          numprocs * threads_per_task, 1, 1, &
167                          threads_per_task
168       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 109 )  num_acc_per_node
169       WRITE ( 18, 110 )
170#endif
171       DO
172          ii = MAXLOC( sum )
173          i = ii(1)
174          IF ( sum(i) /= -1.0 )  THEN
175             WRITE ( 18, 102 ) &
176                log_point(i)%place, log_point(i)%sum,                &
177                log_point(i)%sum / log_point(1)%sum * 100.0,         &
178                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
179             sum(i) = -1.0
180          ELSE
181             EXIT
182          ENDIF
183       ENDDO
184    ENDIF
185
186
187!
188!-- The same procedure again for the individual measurements.
189!
190!-- Compute cpu-times in seconds
191    log_point_s%mtime  = log_point_s%mtime  / norm
192    log_point_s%sum    = log_point_s%sum    / norm
193    log_point_s%vector = log_point_s%vector / norm
194    WHERE ( log_point_s%counts /= 0 )
195       log_point_s%mean = log_point_s%sum / log_point_s%counts
196    END WHERE
197
198!
199!-- Collect cpu-times from all PEs and calculate statistics
200#if defined( __parallel )
201!
202!-- Set barrier in order to avoid that PE0 receives log_point_s-data
203!-- while still busy with receiving log_point-data (see above)
204    CALL MPI_BARRIER( comm2d, ierr )   
205#endif
206    IF ( myid == 0 )  THEN
207!
208!--    Initialize temporary arrays needed for statistics
209       pe_min = log_point_s%sum
210       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
211       pe_rms = 0.0
212
213#if defined( __parallel )
214!
215!--    Receive data from all PEs
216       DO  i = 1, numprocs-1
217          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
218                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
219          sender = status(MPI_SOURCE)
220          pe_log_points(:,sender) = pe_max
221       ENDDO
222       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
223!
224!--    Calculate mean of all PEs, store it on log_point_s%sum
225!--    and find minimum and maximum
226       DO  iii = 1, SIZE( log_point )
227          DO  i = 1, numprocs-1
228             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
229             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
230             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
231          ENDDO
232          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
233!
234!--       Calculate rms
235          DO  i = 0, numprocs-1
236             pe_rms(iii) = pe_rms(iii) + ( &
237                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
238                                         )**2
239          ENDDO
240          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
241       ENDDO
242    ELSE
243!
244!--    Send data to PE0 (pe_max is used as temporary storage to send
245!--    the data in order to avoid sending the data type log)
246       pe_max = log_point_s%sum
247       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
248                      ierr )
249#endif
250
251    ENDIF
252
253!
254!-- Write cpu-times
255    IF ( myid == 0 )  THEN
256!
257!--    Re-store sums
258       WHERE ( log_point_s%counts /= 0 )
259          sum = log_point_s%sum
260       ELSEWHERE
261          sum = -1.0
262       ENDWHERE
263
264!
265!--    Write cpu-times sorted by size
266       WRITE ( 18, 101 )
267       DO
268          ii = MAXLOC( sum )
269          i = ii(1)
270          IF ( sum(i) /= -1.0 )  THEN
271             WRITE ( 18, 102 ) &
272                log_point_s(i)%place, log_point_s(i)%sum, &
273                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
274                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
275             sum(i) = -1.0
276          ELSE
277             EXIT
278          ENDIF
279       ENDDO
280
281!
282!--    Output of handling of MPI operations
283       IF ( collective_wait )  THEN
284          WRITE ( 18, 103 )
285       ELSE
286          WRITE ( 18, 104 )
287       ENDIF
288       IF ( synchronous_exchange )  THEN
289          WRITE ( 18, 105 )
290       ELSE
291          WRITE ( 18, 106 )
292       ENDIF
293
294!
295!--    Empty lines in order to create a gap to the results of the model
296!--    continuation runs
297       WRITE ( 18, 107 )
298
299!
300!--    Unit 18 is not needed anymore
301       CALL close_file( 18 )
302
303    ENDIF
304
305
306100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
307            &') tasks *',I5,' threads):')
308
309101 FORMAT (/'special measures:'/ &
310            &'-----------------------------------------------------------', &
311            &'--------------------')
312
313102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
314103 FORMAT (/'Barriers are set in front of collective operations')
315104 FORMAT (/'No barriers are set in front of collective operations')
316105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
317106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
318107 FORMAT (//)
319108 FORMAT ('Accelerator boards per node: ',I2)
320109 FORMAT ('Accelerator boards: ',I2)
321110 FORMAT ('----------------------------------------------------------',   &
322            &'------------'//&
323            &'place:                        mean        counts      min  ', &
324            &'     max       rms'/ &
325            &'                           sec.      %                sec. ', &
326            &'     sec.      sec.'/  &
327            &'-----------------------------------------------------------', &
328            &'-------------------')
329
330 END SUBROUTINE cpu_statistics
331
Note: See TracBrowser for help on using the repository browser.