source: palm/trunk/SOURCE/cpu_statistics.f90 @ 270

Last change on this file since 270 was 226, checked in by raasch, 16 years ago

preparations for the next release

  • Property svn:keywords set to Id
File size: 8.7 KB
Line 
1 SUBROUTINE cpu_statistics
2
3!------------------------------------------------------------------------------!
4! Actual revisions:
5! -----------------
6!
7!
8! Former revisions:
9! -----------------
10! $Id: cpu_statistics.f90 226 2009-02-02 07:39:34Z letzel $
11!
12! 222 2009-01-12 16:04:16Z letzel
13! Bugfix for nonparallel execution
14!
15! 197 2008-09-16 15:29:03Z raasch
16! Format adjustments in order to allow CPU# > 999,
17! data are collected from PE0 in an ordered sequence which seems to avoid
18! hanging of processes on SGI-ICE
19!
20! 82 2007-04-16 15:40:52Z raasch
21! Preprocessor directives for old systems removed
22!
23! RCS Log replace by Id keyword, revision history cleaned up
24!
25! Revision 1.13  2006/04/26 12:10:51  raasch
26! Output of number of threads per task, max = min in case of 1 PE
27!
28! Revision 1.1  1997/07/24 11:11:11  raasch
29! Initial revision
30!
31!
32! Description:
33! ------------
34! Analysis and output of the cpu-times measured. All PE results are collected
35! on PE0 in order to calculate the mean cpu-time over all PEs and other
36! statistics. The output is sorted according to the amount of cpu-time consumed
37! and output on PE0.
38!------------------------------------------------------------------------------!
39
40    USE cpulog
41    USE pegrid
42    USE control_parameters
43
44    IMPLICIT NONE
45
46    INTEGER    ::  i, ii(1), iii, lp, sender
47    REAL, SAVE ::  norm = 1.0
48    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
49    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
50
51
52!
53!-- Compute cpu-times in seconds
54    log_point%mtime  = log_point%mtime  / norm
55    log_point%sum    = log_point%sum    / norm
56    log_point%vector = log_point%vector / norm
57    WHERE ( log_point%counts /= 0 )
58       log_point%mean = log_point%sum / log_point%counts
59    END WHERE
60
61
62!
63!-- Collect cpu-times from all PEs and calculate statistics
64    IF ( myid == 0 )  THEN
65!
66!--    Allocate and initialize temporary arrays needed for statistics
67       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
68                 pe_rms( SIZE( log_point ) ),                              &
69                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
70       pe_min = log_point%sum
71       pe_max = log_point%sum    ! need to be set in case of 1 PE
72       pe_rms = 0.0
73
74#if defined( __parallel )
75!
76!--    Receive data from all PEs
77       DO  i = 1, numprocs-1
78          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
79                         i, i, comm2d, status, ierr )
80          sender = status(MPI_SOURCE)
81          pe_log_points(:,sender) = pe_max
82       ENDDO
83       pe_log_points(:,0) = log_point%sum   ! Results from PE0
84!
85!--    Calculate mean of all PEs, store it on log_point%sum
86!--    and find minimum and maximum
87       DO  iii = 1, SIZE( log_point )
88          DO  i = 1, numprocs-1
89             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
90             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
91             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
92          ENDDO
93          log_point(iii)%sum = log_point(iii)%sum / numprocs
94!
95!--       Calculate rms
96          DO  i = 0, numprocs-1
97             pe_rms(iii) = pe_rms(iii) + ( &
98                                 pe_log_points(iii,i) - log_point(iii)%sum &
99                                         )**2
100          ENDDO
101          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
102       ENDDO
103    ELSE
104!
105!--    Send data to PE0 (pe_max is used as temporary storage to send
106!--    the data in order to avoid sending the data type log)
107       ALLOCATE( pe_max( SIZE( log_point ) ) )
108       pe_max = log_point%sum
109       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
110                      ierr )
111#endif
112
113    ENDIF
114
115!
116!-- Write cpu-times
117    IF ( myid == 0 )  THEN
118!
119!--    Re-store sums
120       ALLOCATE( sum( SIZE( log_point ) ) )
121       WHERE ( log_point%counts /= 0 )
122          sum = log_point%sum
123       ELSEWHERE
124          sum = -1.0
125       ENDWHERE
126
127!
128!--    Write cpu-times sorted by size
129       CALL check_open( 18 )
130#if defined( __parallel )
131       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
132                          numprocs * threads_per_task, pdims(1), pdims(2), &
133                          threads_per_task
134#else
135       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
136                          numprocs * threads_per_task, 1, 1, &
137                          threads_per_task
138#endif
139       DO
140          ii = MAXLOC( sum )
141          i = ii(1)
142          IF ( sum(i) /= -1.0 )  THEN
143             WRITE ( 18, 102 ) &
144                log_point(i)%place, log_point(i)%sum,                &
145                log_point(i)%sum / log_point(1)%sum * 100.0,         &
146                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
147             sum(i) = -1.0
148          ELSE
149             EXIT
150          ENDIF
151       ENDDO
152    ENDIF
153
154
155!
156!-- The same procedure again for the individual measurements.
157!
158!-- Compute cpu-times in seconds
159    log_point_s%mtime  = log_point_s%mtime  / norm
160    log_point_s%sum    = log_point_s%sum    / norm
161    log_point_s%vector = log_point_s%vector / norm
162    WHERE ( log_point_s%counts /= 0 )
163       log_point_s%mean = log_point_s%sum / log_point_s%counts
164    END WHERE
165
166!
167!-- Collect cpu-times from all PEs and calculate statistics
168#if defined( __parallel )
169!
170!-- Set barrier in order to avoid that PE0 receives log_point_s-data
171!-- while still busy with receiving log_point-data (see above)
172    CALL MPI_BARRIER( comm2d, ierr )   
173#endif
174    IF ( myid == 0 )  THEN
175!
176!--    Initialize temporary arrays needed for statistics
177       pe_min = log_point_s%sum
178       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
179       pe_rms = 0.0
180
181#if defined( __parallel )
182!
183!--    Receive data from all PEs
184       DO  i = 1, numprocs-1
185          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
186                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
187          sender = status(MPI_SOURCE)
188          pe_log_points(:,sender) = pe_max
189       ENDDO
190       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
191!
192!--    Calculate mean of all PEs, store it on log_point_s%sum
193!--    and find minimum and maximum
194       DO  iii = 1, SIZE( log_point )
195          DO  i = 1, numprocs-1
196             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
197             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
198             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
199          ENDDO
200          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
201!
202!--       Calculate rms
203          DO  i = 0, numprocs-1
204             pe_rms(iii) = pe_rms(iii) + ( &
205                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
206                                         )**2
207          ENDDO
208          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
209       ENDDO
210    ELSE
211!
212!--    Send data to PE0 (pe_max is used as temporary storage to send
213!--    the data in order to avoid sending the data type log)
214       pe_max = log_point_s%sum
215       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
216                      ierr )
217#endif
218
219    ENDIF
220
221!
222!-- Write cpu-times
223    IF ( myid == 0 )  THEN
224!
225!--    Re-store sums
226       WHERE ( log_point_s%counts /= 0 )
227          sum = log_point_s%sum
228       ELSEWHERE
229          sum = -1.0
230       ENDWHERE
231
232!
233!--    Write cpu-times sorted by size
234       WRITE ( 18, 101 )
235       DO
236          ii = MAXLOC( sum )
237          i = ii(1)
238          IF ( sum(i) /= -1.0 )  THEN
239             WRITE ( 18, 102 ) &
240                log_point_s(i)%place, log_point_s(i)%sum, &
241                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
242                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
243             sum(i) = -1.0
244          ELSE
245             EXIT
246          ENDIF
247       ENDDO
248
249!
250!--    Empty lines in order to create a gap to the results of the model
251!--    continuation runs
252       WRITE ( 18, 103 )
253
254!
255!--    Unit 18 is not needed anymore
256       CALL close_file( 18 )
257
258    ENDIF
259
260
261100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
262            &') tasks *',I5,' threads):'/ &
263             &'----------------------------------------------------------', &
264             &'------------'//&
265            &'place:                        mean        counts      min  ', &
266             &'     max       rms'/ &
267            &'                           sec.      %                sec. ', &
268             &'     sec.      sec.'/  &
269            &'-----------------------------------------------------------', &
270             &'-------------------')
271
272101 FORMAT (/'special measures:'/ &
273            &'-----------------------------------------------------------', &
274            &'--------------------')
275
276102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
277103 FORMAT (//)
278
279 END SUBROUTINE cpu_statistics
280
Note: See TracBrowser for help on using the repository browser.