source: palm/trunk/SOURCE/cpu_statistics.f90 @ 1017

Last change on this file since 1017 was 1017, checked in by raasch, 9 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 9.7 KB
Line 
1 SUBROUTINE cpu_statistics
2
3!------------------------------------------------------------------------------!
4! Current revisions:
5! -----------------
6!
7!
8! Former revisions:
9! -----------------
10! $Id: cpu_statistics.f90 1017 2012-09-27 11:28:50Z raasch $
11!
12! 1015 2012-09-27 09:23:24Z raasch
13! output of accelerator board information
14!
15! 683 2011-02-09 14:25:15Z raasch
16! output of handling of ghostpoint exchange
17!
18! 622 2010-12-10 08:08:13Z raasch
19! output of handling of collective operations
20!
21! 222 2009-01-12 16:04:16Z letzel
22! Bugfix for nonparallel execution
23!
24! 197 2008-09-16 15:29:03Z raasch
25! Format adjustments in order to allow CPU# > 999,
26! data are collected from PE0 in an ordered sequence which seems to avoid
27! hanging of processes on SGI-ICE
28!
29! 82 2007-04-16 15:40:52Z raasch
30! Preprocessor directives for old systems removed
31!
32! RCS Log replace by Id keyword, revision history cleaned up
33!
34! Revision 1.13  2006/04/26 12:10:51  raasch
35! Output of number of threads per task, max = min in case of 1 PE
36!
37! Revision 1.1  1997/07/24 11:11:11  raasch
38! Initial revision
39!
40!
41! Description:
42! ------------
43! Analysis and output of the cpu-times measured. All PE results are collected
44! on PE0 in order to calculate the mean cpu-time over all PEs and other
45! statistics. The output is sorted according to the amount of cpu-time consumed
46! and output on PE0.
47!------------------------------------------------------------------------------!
48
49    USE cpulog
50    USE pegrid
51    USE control_parameters
52
53    IMPLICIT NONE
54
55    INTEGER    ::  i, ii(1), iii, lp, sender
56    REAL, SAVE ::  norm = 1.0
57    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
58    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
59
60
61!
62!-- Compute cpu-times in seconds
63    log_point%mtime  = log_point%mtime  / norm
64    log_point%sum    = log_point%sum    / norm
65    log_point%vector = log_point%vector / norm
66    WHERE ( log_point%counts /= 0 )
67       log_point%mean = log_point%sum / log_point%counts
68    END WHERE
69
70
71!
72!-- Collect cpu-times from all PEs and calculate statistics
73    IF ( myid == 0 )  THEN
74!
75!--    Allocate and initialize temporary arrays needed for statistics
76       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
77                 pe_rms( SIZE( log_point ) ),                              &
78                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
79       pe_min = log_point%sum
80       pe_max = log_point%sum    ! need to be set in case of 1 PE
81       pe_rms = 0.0
82
83#if defined( __parallel )
84!
85!--    Receive data from all PEs
86       DO  i = 1, numprocs-1
87          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
88                         i, i, comm2d, status, ierr )
89          sender = status(MPI_SOURCE)
90          pe_log_points(:,sender) = pe_max
91       ENDDO
92       pe_log_points(:,0) = log_point%sum   ! Results from PE0
93!
94!--    Calculate mean of all PEs, store it on log_point%sum
95!--    and find minimum and maximum
96       DO  iii = 1, SIZE( log_point )
97          DO  i = 1, numprocs-1
98             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
99             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
100             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
101          ENDDO
102          log_point(iii)%sum = log_point(iii)%sum / numprocs
103!
104!--       Calculate rms
105          DO  i = 0, numprocs-1
106             pe_rms(iii) = pe_rms(iii) + ( &
107                                 pe_log_points(iii,i) - log_point(iii)%sum &
108                                         )**2
109          ENDDO
110          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
111       ENDDO
112    ELSE
113!
114!--    Send data to PE0 (pe_max is used as temporary storage to send
115!--    the data in order to avoid sending the data type log)
116       ALLOCATE( pe_max( SIZE( log_point ) ) )
117       pe_max = log_point%sum
118       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
119                      ierr )
120#endif
121
122    ENDIF
123
124!
125!-- Write cpu-times
126    IF ( myid == 0 )  THEN
127!
128!--    Re-store sums
129       ALLOCATE( sum( SIZE( log_point ) ) )
130       WHERE ( log_point%counts /= 0 )
131          sum = log_point%sum
132       ELSEWHERE
133          sum = -1.0
134       ENDWHERE
135
136!
137!--    Write cpu-times sorted by size
138       CALL check_open( 18 )
139#if defined( __parallel )
140       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
141                          numprocs * threads_per_task, pdims(1), pdims(2), &
142                          threads_per_task
143       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 108 )  num_acc_per_node
144       WRITE ( 18, 110 )
145#else
146       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
147                          numprocs * threads_per_task, 1, 1, &
148                          threads_per_task
149       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 109 )  num_acc_per_node
150       WRITE ( 18, 110 )
151#endif
152       DO
153          ii = MAXLOC( sum )
154          i = ii(1)
155          IF ( sum(i) /= -1.0 )  THEN
156             WRITE ( 18, 102 ) &
157                log_point(i)%place, log_point(i)%sum,                &
158                log_point(i)%sum / log_point(1)%sum * 100.0,         &
159                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
160             sum(i) = -1.0
161          ELSE
162             EXIT
163          ENDIF
164       ENDDO
165    ENDIF
166
167
168!
169!-- The same procedure again for the individual measurements.
170!
171!-- Compute cpu-times in seconds
172    log_point_s%mtime  = log_point_s%mtime  / norm
173    log_point_s%sum    = log_point_s%sum    / norm
174    log_point_s%vector = log_point_s%vector / norm
175    WHERE ( log_point_s%counts /= 0 )
176       log_point_s%mean = log_point_s%sum / log_point_s%counts
177    END WHERE
178
179!
180!-- Collect cpu-times from all PEs and calculate statistics
181#if defined( __parallel )
182!
183!-- Set barrier in order to avoid that PE0 receives log_point_s-data
184!-- while still busy with receiving log_point-data (see above)
185    CALL MPI_BARRIER( comm2d, ierr )   
186#endif
187    IF ( myid == 0 )  THEN
188!
189!--    Initialize temporary arrays needed for statistics
190       pe_min = log_point_s%sum
191       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
192       pe_rms = 0.0
193
194#if defined( __parallel )
195!
196!--    Receive data from all PEs
197       DO  i = 1, numprocs-1
198          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
199                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
200          sender = status(MPI_SOURCE)
201          pe_log_points(:,sender) = pe_max
202       ENDDO
203       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
204!
205!--    Calculate mean of all PEs, store it on log_point_s%sum
206!--    and find minimum and maximum
207       DO  iii = 1, SIZE( log_point )
208          DO  i = 1, numprocs-1
209             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
210             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
211             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
212          ENDDO
213          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
214!
215!--       Calculate rms
216          DO  i = 0, numprocs-1
217             pe_rms(iii) = pe_rms(iii) + ( &
218                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
219                                         )**2
220          ENDDO
221          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
222       ENDDO
223    ELSE
224!
225!--    Send data to PE0 (pe_max is used as temporary storage to send
226!--    the data in order to avoid sending the data type log)
227       pe_max = log_point_s%sum
228       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
229                      ierr )
230#endif
231
232    ENDIF
233
234!
235!-- Write cpu-times
236    IF ( myid == 0 )  THEN
237!
238!--    Re-store sums
239       WHERE ( log_point_s%counts /= 0 )
240          sum = log_point_s%sum
241       ELSEWHERE
242          sum = -1.0
243       ENDWHERE
244
245!
246!--    Write cpu-times sorted by size
247       WRITE ( 18, 101 )
248       DO
249          ii = MAXLOC( sum )
250          i = ii(1)
251          IF ( sum(i) /= -1.0 )  THEN
252             WRITE ( 18, 102 ) &
253                log_point_s(i)%place, log_point_s(i)%sum, &
254                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
255                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
256             sum(i) = -1.0
257          ELSE
258             EXIT
259          ENDIF
260       ENDDO
261
262!
263!--    Output of handling of MPI operations
264       IF ( collective_wait )  THEN
265          WRITE ( 18, 103 )
266       ELSE
267          WRITE ( 18, 104 )
268       ENDIF
269       IF ( synchronous_exchange )  THEN
270          WRITE ( 18, 105 )
271       ELSE
272          WRITE ( 18, 106 )
273       ENDIF
274
275!
276!--    Empty lines in order to create a gap to the results of the model
277!--    continuation runs
278       WRITE ( 18, 107 )
279
280!
281!--    Unit 18 is not needed anymore
282       CALL close_file( 18 )
283
284    ENDIF
285
286
287100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
288            &') tasks *',I5,' threads):')
289
290101 FORMAT (/'special measures:'/ &
291            &'-----------------------------------------------------------', &
292            &'--------------------')
293
294102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
295103 FORMAT (/'Barriers are set in front of collective operations')
296104 FORMAT (/'No barriers are set in front of collective operations')
297105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
298106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
299107 FORMAT (//)
300108 FORMAT ('Accelerator boards per node: ',I2)
301109 FORMAT ('Accelerator boards: ',I2)
302110 FORMAT ('----------------------------------------------------------',   &
303            &'------------'//&
304            &'place:                        mean        counts      min  ', &
305            &'     max       rms'/ &
306            &'                           sec.      %                sec. ', &
307            &'     sec.      sec.'/  &
308            &'-----------------------------------------------------------', &
309            &'-------------------')
310
311 END SUBROUTINE cpu_statistics
312
Note: See TracBrowser for help on using the repository browser.