source: palm/trunk/SOURCE/cpu_statistics.f90 @ 1036

Last change on this file since 1036 was 1036, checked in by raasch, 11 years ago

code has been put under the GNU General Public License (v3)

  • Property svn:keywords set to Id
File size: 10.5 KB
Line 
1 SUBROUTINE cpu_statistics
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! -----------------
22!
23!
24! Former revisions:
25! -----------------
26! $Id: cpu_statistics.f90 1036 2012-10-22 13:43:42Z raasch $
27!
28! 1015 2012-09-27 09:23:24Z raasch
29! output of accelerator board information
30!
31! 683 2011-02-09 14:25:15Z raasch
32! output of handling of ghostpoint exchange
33!
34! 622 2010-12-10 08:08:13Z raasch
35! output of handling of collective operations
36!
37! 222 2009-01-12 16:04:16Z letzel
38! Bugfix for nonparallel execution
39!
40! 197 2008-09-16 15:29:03Z raasch
41! Format adjustments in order to allow CPU# > 999,
42! data are collected from PE0 in an ordered sequence which seems to avoid
43! hanging of processes on SGI-ICE
44!
45! 82 2007-04-16 15:40:52Z raasch
46! Preprocessor directives for old systems removed
47!
48! RCS Log replace by Id keyword, revision history cleaned up
49!
50! Revision 1.13  2006/04/26 12:10:51  raasch
51! Output of number of threads per task, max = min in case of 1 PE
52!
53! Revision 1.1  1997/07/24 11:11:11  raasch
54! Initial revision
55!
56!
57! Description:
58! ------------
59! Analysis and output of the cpu-times measured. All PE results are collected
60! on PE0 in order to calculate the mean cpu-time over all PEs and other
61! statistics. The output is sorted according to the amount of cpu-time consumed
62! and output on PE0.
63!------------------------------------------------------------------------------!
64
65    USE cpulog
66    USE pegrid
67    USE control_parameters
68
69    IMPLICIT NONE
70
71    INTEGER    ::  i, ii(1), iii, lp, sender
72    REAL, SAVE ::  norm = 1.0
73    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
74    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
75
76
77!
78!-- Compute cpu-times in seconds
79    log_point%mtime  = log_point%mtime  / norm
80    log_point%sum    = log_point%sum    / norm
81    log_point%vector = log_point%vector / norm
82    WHERE ( log_point%counts /= 0 )
83       log_point%mean = log_point%sum / log_point%counts
84    END WHERE
85
86
87!
88!-- Collect cpu-times from all PEs and calculate statistics
89    IF ( myid == 0 )  THEN
90!
91!--    Allocate and initialize temporary arrays needed for statistics
92       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
93                 pe_rms( SIZE( log_point ) ),                              &
94                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
95       pe_min = log_point%sum
96       pe_max = log_point%sum    ! need to be set in case of 1 PE
97       pe_rms = 0.0
98
99#if defined( __parallel )
100!
101!--    Receive data from all PEs
102       DO  i = 1, numprocs-1
103          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
104                         i, i, comm2d, status, ierr )
105          sender = status(MPI_SOURCE)
106          pe_log_points(:,sender) = pe_max
107       ENDDO
108       pe_log_points(:,0) = log_point%sum   ! Results from PE0
109!
110!--    Calculate mean of all PEs, store it on log_point%sum
111!--    and find minimum and maximum
112       DO  iii = 1, SIZE( log_point )
113          DO  i = 1, numprocs-1
114             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
115             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
116             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
117          ENDDO
118          log_point(iii)%sum = log_point(iii)%sum / numprocs
119!
120!--       Calculate rms
121          DO  i = 0, numprocs-1
122             pe_rms(iii) = pe_rms(iii) + ( &
123                                 pe_log_points(iii,i) - log_point(iii)%sum &
124                                         )**2
125          ENDDO
126          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
127       ENDDO
128    ELSE
129!
130!--    Send data to PE0 (pe_max is used as temporary storage to send
131!--    the data in order to avoid sending the data type log)
132       ALLOCATE( pe_max( SIZE( log_point ) ) )
133       pe_max = log_point%sum
134       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
135                      ierr )
136#endif
137
138    ENDIF
139
140!
141!-- Write cpu-times
142    IF ( myid == 0 )  THEN
143!
144!--    Re-store sums
145       ALLOCATE( sum( SIZE( log_point ) ) )
146       WHERE ( log_point%counts /= 0 )
147          sum = log_point%sum
148       ELSEWHERE
149          sum = -1.0
150       ENDWHERE
151
152!
153!--    Write cpu-times sorted by size
154       CALL check_open( 18 )
155#if defined( __parallel )
156       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
157                          numprocs * threads_per_task, pdims(1), pdims(2), &
158                          threads_per_task
159       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 108 )  num_acc_per_node
160       WRITE ( 18, 110 )
161#else
162       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
163                          numprocs * threads_per_task, 1, 1, &
164                          threads_per_task
165       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 109 )  num_acc_per_node
166       WRITE ( 18, 110 )
167#endif
168       DO
169          ii = MAXLOC( sum )
170          i = ii(1)
171          IF ( sum(i) /= -1.0 )  THEN
172             WRITE ( 18, 102 ) &
173                log_point(i)%place, log_point(i)%sum,                &
174                log_point(i)%sum / log_point(1)%sum * 100.0,         &
175                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
176             sum(i) = -1.0
177          ELSE
178             EXIT
179          ENDIF
180       ENDDO
181    ENDIF
182
183
184!
185!-- The same procedure again for the individual measurements.
186!
187!-- Compute cpu-times in seconds
188    log_point_s%mtime  = log_point_s%mtime  / norm
189    log_point_s%sum    = log_point_s%sum    / norm
190    log_point_s%vector = log_point_s%vector / norm
191    WHERE ( log_point_s%counts /= 0 )
192       log_point_s%mean = log_point_s%sum / log_point_s%counts
193    END WHERE
194
195!
196!-- Collect cpu-times from all PEs and calculate statistics
197#if defined( __parallel )
198!
199!-- Set barrier in order to avoid that PE0 receives log_point_s-data
200!-- while still busy with receiving log_point-data (see above)
201    CALL MPI_BARRIER( comm2d, ierr )   
202#endif
203    IF ( myid == 0 )  THEN
204!
205!--    Initialize temporary arrays needed for statistics
206       pe_min = log_point_s%sum
207       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
208       pe_rms = 0.0
209
210#if defined( __parallel )
211!
212!--    Receive data from all PEs
213       DO  i = 1, numprocs-1
214          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
215                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
216          sender = status(MPI_SOURCE)
217          pe_log_points(:,sender) = pe_max
218       ENDDO
219       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
220!
221!--    Calculate mean of all PEs, store it on log_point_s%sum
222!--    and find minimum and maximum
223       DO  iii = 1, SIZE( log_point )
224          DO  i = 1, numprocs-1
225             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
226             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
227             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
228          ENDDO
229          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
230!
231!--       Calculate rms
232          DO  i = 0, numprocs-1
233             pe_rms(iii) = pe_rms(iii) + ( &
234                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
235                                         )**2
236          ENDDO
237          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
238       ENDDO
239    ELSE
240!
241!--    Send data to PE0 (pe_max is used as temporary storage to send
242!--    the data in order to avoid sending the data type log)
243       pe_max = log_point_s%sum
244       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
245                      ierr )
246#endif
247
248    ENDIF
249
250!
251!-- Write cpu-times
252    IF ( myid == 0 )  THEN
253!
254!--    Re-store sums
255       WHERE ( log_point_s%counts /= 0 )
256          sum = log_point_s%sum
257       ELSEWHERE
258          sum = -1.0
259       ENDWHERE
260
261!
262!--    Write cpu-times sorted by size
263       WRITE ( 18, 101 )
264       DO
265          ii = MAXLOC( sum )
266          i = ii(1)
267          IF ( sum(i) /= -1.0 )  THEN
268             WRITE ( 18, 102 ) &
269                log_point_s(i)%place, log_point_s(i)%sum, &
270                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
271                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
272             sum(i) = -1.0
273          ELSE
274             EXIT
275          ENDIF
276       ENDDO
277
278!
279!--    Output of handling of MPI operations
280       IF ( collective_wait )  THEN
281          WRITE ( 18, 103 )
282       ELSE
283          WRITE ( 18, 104 )
284       ENDIF
285       IF ( synchronous_exchange )  THEN
286          WRITE ( 18, 105 )
287       ELSE
288          WRITE ( 18, 106 )
289       ENDIF
290
291!
292!--    Empty lines in order to create a gap to the results of the model
293!--    continuation runs
294       WRITE ( 18, 107 )
295
296!
297!--    Unit 18 is not needed anymore
298       CALL close_file( 18 )
299
300    ENDIF
301
302
303100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
304            &') tasks *',I5,' threads):')
305
306101 FORMAT (/'special measures:'/ &
307            &'-----------------------------------------------------------', &
308            &'--------------------')
309
310102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
311103 FORMAT (/'Barriers are set in front of collective operations')
312104 FORMAT (/'No barriers are set in front of collective operations')
313105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
314106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
315107 FORMAT (//)
316108 FORMAT ('Accelerator boards per node: ',I2)
317109 FORMAT ('Accelerator boards: ',I2)
318110 FORMAT ('----------------------------------------------------------',   &
319            &'------------'//&
320            &'place:                        mean        counts      min  ', &
321            &'     max       rms'/ &
322            &'                           sec.      %                sec. ', &
323            &'     sec.      sec.'/  &
324            &'-----------------------------------------------------------', &
325            &'-------------------')
326
327 END SUBROUTINE cpu_statistics
328
Note: See TracBrowser for help on using the repository browser.