source: palm/trunk/SOURCE/cpu_statistics.f90 @ 1

Last change on this file since 1 was 1, checked in by raasch, 17 years ago

Initial repository layout and content

File size: 9.0 KB
Line 
1 SUBROUTINE cpu_statistics
2
3!-------------------------------------------------------------------------------!
4! Actual revisions:
5! -----------------
6!
7!
8! Former revisions:
9! -----------------
10! $Log: cpu_statistics.f90,v $
11! Revision 1.13  2006/04/26 12:10:51  raasch
12! Output of number of threads per task, max = min in case of 1 PE
13!
14! Revision 1.12  2003/03/16 09:30:22  raasch
15! Two underscores (_) are placed in front of all define-strings
16!
17! Revision 1.11  2002/12/19 14:12:08  raasch
18! PE results are collected on PE0 in order to calculate mean cpu statistics.
19! Output format changed.
20!
21! Revision 1.10  2001/03/30 07:01:02  raasch
22! Translation of remaining German identifiers (variables, subroutines, etc.)
23!
24! Revision 1.9  2001/01/22 05:49:49  raasch
25! Module test_variables removed
26!
27! Revision 1.8  2001/01/02 17:25:23  raasch
28! Unit 18 closed at the end of the subroutine
29!
30! Revision 1.7  2000/12/20 10:10:26  letzel
31! All comments translated into English.
32!
33! Revision 1.1  1997/07/24 11:11:11  raasch
34! Initial revision
35!
36!
37! Description:
38! ------------
39! Analysis and output of the cpu-times measured. All PE results are collected
40! on PE0 in order to calculate the mean cpu-time over all PEs and other
41! statistics. The output is sorted according to the amount of cpu-time consumed
42! and output on PE0.
43!-------------------------------------------------------------------------------!
44
45    USE cpulog
46    USE pegrid
47    USE control_parameters
48
49    IMPLICIT NONE
50
51    INTEGER    ::  i, ii(1), iii, lp, sender
52    REAL, SAVE ::  norm = 1.0
53    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
54    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
55
56
57!#if defined( __hpmuk )  &&  ! defined( __chinook )
58!    norm = clock_ticks_per_second()
59!#endif
60
61!
62!-- Compute cpu-times in seconds
63    log_point%mtime  = log_point%mtime  / norm
64    log_point%sum    = log_point%sum    / norm
65    log_point%vector = log_point%vector / norm
66    WHERE ( log_point%counts /= 0 )
67       log_point%mean = log_point%sum / log_point%counts
68    END WHERE
69
70
71!
72!-- Collect cpu-times from all PEs and calculate statistics
73    IF ( myid == 0 )  THEN
74!
75!--    Allocate and initialize temporary arrays needed for statistics
76       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
77                 pe_rms( SIZE( log_point ) ),                              &
78                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
79       pe_min = log_point%sum
80       pe_max = log_point%sum    ! need to be set in case of 1 PE
81       pe_rms = 0.0
82
83#if defined( __parallel )
84!
85!--    Receive data from all PEs
86       DO  i = 1, numprocs-1
87          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
88                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
89          sender = status(MPI_SOURCE)
90          pe_log_points(:,sender) = pe_max
91       ENDDO
92       pe_log_points(:,0) = log_point%sum   ! Results from PE0
93!
94!--    Calculate mean of all PEs, store it on log_point%sum
95!--    and find minimum and maximum
96       DO  iii = 1, SIZE( log_point )
97          DO  i = 1, numprocs-1
98             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
99             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
100             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
101          ENDDO
102          log_point(iii)%sum = log_point(iii)%sum / numprocs
103!
104!--       Calculate rms
105          DO  i = 0, numprocs-1
106!             IF ( log_point(iii)%place == 'run_control' )  THEN
107!                PRINT*, 'pe_rms=',pe_rms(iii),' plp=',pe_log_points(iii,i), &
108!                        ' lps=',log_point(iii)%sum
109!             ENDIF
110             pe_rms(iii) = pe_rms(iii) + ( &
111                                 pe_log_points(iii,i) - log_point(iii)%sum &
112                                         )**2
113          ENDDO
114          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
115       ENDDO
116    ELSE
117!
118!--    Send data to PE0 (pe_max is used as temporary storage to send
119!--    the data in order to avoid sending the data type log)
120       ALLOCATE( pe_max( SIZE( log_point ) ) )
121       pe_max = log_point%sum
122       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
123                      ierr )
124#endif
125
126    ENDIF
127
128!
129!-- Write cpu-times
130    IF ( myid == 0 )  THEN
131!
132!--    Re-store sums
133       ALLOCATE( sum( SIZE( log_point ) ) )
134       WHERE ( log_point%counts /= 0 )
135          sum = log_point%sum
136       ELSEWHERE
137          sum = -1.0
138       ENDWHERE
139
140!
141!--    Write cpu-times sorted by size
142       CALL check_open( 18 )
143       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
144                          numprocs * threads_per_task, numprocs, &
145                          threads_per_task
146       DO
147          ii = MAXLOC( sum )
148          i = ii(1)
149          IF ( sum(i) /= -1.0 )  THEN
150             WRITE ( 18, 102 ) &
151                log_point(i)%place, log_point(i)%sum,                &
152                log_point(i)%sum / log_point(1)%sum * 100.0,         &
153                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
154             sum(i) = -1.0
155          ELSE
156             EXIT
157          ENDIF
158       ENDDO
159    ENDIF
160
161
162!
163!-- The same procedure again for the individual measurements.
164!
165!-- Compute cpu-times in seconds
166    log_point_s%mtime  = log_point_s%mtime  / norm
167    log_point_s%sum    = log_point_s%sum    / norm
168    log_point_s%vector = log_point_s%vector / norm
169    WHERE ( log_point_s%counts /= 0 )
170       log_point_s%mean = log_point_s%sum / log_point_s%counts
171    END WHERE
172
173!
174!-- Collect cpu-times from all PEs and calculate statistics
175#if defined( __parallel )
176!
177!-- Set barrier in order to avoid that PE0 receives log_point_s-data
178!-- while still busy with receiving log_point-data (see above)
179    CALL MPI_BARRIER( comm2d, ierr )   
180#endif
181    IF ( myid == 0 )  THEN
182!
183!--    Initialize temporary arrays needed for statistics
184       pe_min = log_point_s%sum
185       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
186       pe_rms = 0.0
187
188#if defined( __parallel )
189!
190!--    Receive data from all PEs
191       DO  i = 1, numprocs-1
192          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
193                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
194          sender = status(MPI_SOURCE)
195          pe_log_points(:,sender) = pe_max
196       ENDDO
197       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
198!
199!--    Calculate mean of all PEs, store it on log_point_s%sum
200!--    and find minimum and maximum
201       DO  iii = 1, SIZE( log_point )
202          DO  i = 1, numprocs-1
203             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
204             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
205             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
206          ENDDO
207          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
208!
209!--       Calculate rms
210          DO  i = 0, numprocs-1
211             pe_rms(iii) = pe_rms(iii) + ( &
212                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
213                                         )**2
214          ENDDO
215          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
216       ENDDO
217    ELSE
218!
219!--    Send data to PE0 (pe_max is used as temporary storage to send
220!--    the data in order to avoid sending the data type log)
221       pe_max = log_point_s%sum
222       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
223                      ierr )
224#endif
225
226    ENDIF
227
228!
229!-- Write cpu-times
230    IF ( myid == 0 )  THEN
231!
232!--    Re-store sums
233       WHERE ( log_point_s%counts /= 0 )
234          sum = log_point_s%sum
235       ELSEWHERE
236          sum = -1.0
237       ENDWHERE
238
239!
240!--    Write cpu-times sorted by size
241       WRITE ( 18, 101 )
242       DO
243          ii = MAXLOC( sum )
244          i = ii(1)
245          IF ( sum(i) /= -1.0 )  THEN
246             WRITE ( 18, 102 ) &
247                log_point_s(i)%place, log_point_s(i)%sum, &
248                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
249                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
250             sum(i) = -1.0
251          ELSE
252             EXIT
253          ENDIF
254       ENDDO
255
256!
257!--    Empty lines in order to create a gap to the results of the model
258!--    continuation runs
259       WRITE ( 18, 103 )
260
261!
262!--    Unit 18 is not needed anymore
263       CALL close_file( 18 )
264
265    ENDIF
266
267
268100 FORMAT (A/11('-')//'CPU measures for ',I3,' PEs (',I3,' tasks *',I3,    &
269            &' threads):'/ &
270             &'--------------------------------------------------'//        &
271            &'place:                        mean        counts      min  ', &
272             &'     max       rms'/ &
273            &'                           sec.      %                sec. ', &
274             &'     sec.      sec.'/  &
275            &'-----------------------------------------------------------', &
276             &'-------------------')
277
278101 FORMAT (/'special measures:'/ &
279            &'-----------------------------------------------------------', &
280            &'--------------------')
281
282102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
283103 FORMAT (//)
284
285 END SUBROUTINE cpu_statistics
286
Note: See TracBrowser for help on using the repository browser.