source: palm/trunk/SOURCE/cpu_statistics.f90 @ 1119

Last change on this file since 1119 was 1112, checked in by raasch, 12 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 11.6 KB
RevLine 
[1]1 SUBROUTINE cpu_statistics
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1]21! -----------------
[1112]22!
[1]23!
24! Former revisions:
25! -----------------
[3]26! $Id: cpu_statistics.f90 1112 2013-03-09 00:34:37Z raasch $
[83]27!
[1112]28! 1111 2013-03-08 23:54:10Z raasch
29! output of grid point numbers and average CPU time per grid point and timestep
30!
[1093]31! 1092 2013-02-02 11:24:22Z raasch
32! unused variables removed
33!
[1037]34! 1036 2012-10-22 13:43:42Z raasch
35! code put under GPL (PALM 3.9)
36!
[1017]37! 1015 2012-09-27 09:23:24Z raasch
38! output of accelerator board information
39!
[1015]40! 683 2011-02-09 14:25:15Z raasch
41! output of handling of ghostpoint exchange
42!
[623]43! 622 2010-12-10 08:08:13Z raasch
44! output of handling of collective operations
45!
[226]46! 222 2009-01-12 16:04:16Z letzel
47! Bugfix for nonparallel execution
48!
[198]49! 197 2008-09-16 15:29:03Z raasch
50! Format adjustments in order to allow CPU# > 999,
51! data are collected from PE0 in an ordered sequence which seems to avoid
52! hanging of processes on SGI-ICE
53!
[83]54! 82 2007-04-16 15:40:52Z raasch
55! Preprocessor directives for old systems removed
56!
[3]57! RCS Log replace by Id keyword, revision history cleaned up
58!
[1]59! Revision 1.13  2006/04/26 12:10:51  raasch
60! Output of number of threads per task, max = min in case of 1 PE
61!
62! Revision 1.1  1997/07/24 11:11:11  raasch
63! Initial revision
64!
65!
66! Description:
67! ------------
68! Analysis and output of the cpu-times measured. All PE results are collected
69! on PE0 in order to calculate the mean cpu-time over all PEs and other
70! statistics. The output is sorted according to the amount of cpu-time consumed
71! and output on PE0.
[3]72!------------------------------------------------------------------------------!
[1]73
[1111]74    USE control_parameters
[1]75    USE cpulog
[1111]76    USE indices,  ONLY: nx, ny, nz
[1]77    USE pegrid
78
79    IMPLICIT NONE
80
[1092]81    INTEGER    ::  i, ii(1), iii, sender
[1111]82    REAL       ::  average_cputime
[1]83    REAL, SAVE ::  norm = 1.0
84    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
85    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
86
87
88!
89!-- Compute cpu-times in seconds
90    log_point%mtime  = log_point%mtime  / norm
91    log_point%sum    = log_point%sum    / norm
92    log_point%vector = log_point%vector / norm
93    WHERE ( log_point%counts /= 0 )
94       log_point%mean = log_point%sum / log_point%counts
95    END WHERE
96
97
98!
99!-- Collect cpu-times from all PEs and calculate statistics
100    IF ( myid == 0 )  THEN
101!
102!--    Allocate and initialize temporary arrays needed for statistics
103       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
104                 pe_rms( SIZE( log_point ) ),                              &
105                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
106       pe_min = log_point%sum
107       pe_max = log_point%sum    ! need to be set in case of 1 PE
108       pe_rms = 0.0
109
110#if defined( __parallel )
111!
112!--    Receive data from all PEs
113       DO  i = 1, numprocs-1
114          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
[197]115                         i, i, comm2d, status, ierr )
[1]116          sender = status(MPI_SOURCE)
117          pe_log_points(:,sender) = pe_max
118       ENDDO
119       pe_log_points(:,0) = log_point%sum   ! Results from PE0
120!
121!--    Calculate mean of all PEs, store it on log_point%sum
122!--    and find minimum and maximum
123       DO  iii = 1, SIZE( log_point )
124          DO  i = 1, numprocs-1
125             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
126             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
127             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
128          ENDDO
129          log_point(iii)%sum = log_point(iii)%sum / numprocs
130!
131!--       Calculate rms
132          DO  i = 0, numprocs-1
133             pe_rms(iii) = pe_rms(iii) + ( &
134                                 pe_log_points(iii,i) - log_point(iii)%sum &
135                                         )**2
136          ENDDO
137          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
138       ENDDO
139    ELSE
140!
141!--    Send data to PE0 (pe_max is used as temporary storage to send
142!--    the data in order to avoid sending the data type log)
143       ALLOCATE( pe_max( SIZE( log_point ) ) )
144       pe_max = log_point%sum
[197]145       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
[1]146                      ierr )
147#endif
148
149    ENDIF
150
151!
152!-- Write cpu-times
153    IF ( myid == 0 )  THEN
154!
155!--    Re-store sums
156       ALLOCATE( sum( SIZE( log_point ) ) )
157       WHERE ( log_point%counts /= 0 )
158          sum = log_point%sum
159       ELSEWHERE
160          sum = -1.0
161       ENDWHERE
162
163!
[1111]164!--    Get total time in order to calculate CPU-time per gridpoint and timestep
165       IF ( nr_timesteps_this_run /= 0 )  THEN
166          average_cputime = log_point(1)%sum / REAL( (nx+1) * (ny+1) * nz ) / &
167                            REAL( nr_timesteps_this_run ) * 1E6  ! in micro-sec
168       ELSE
169          average_cputime = -1.0
170       ENDIF
171
172!
[1]173!--    Write cpu-times sorted by size
174       CALL check_open( 18 )
[222]175#if defined( __parallel )
[1111]176       WRITE ( 18, 100 )  TRIM( run_description_header ),                          &
177                          numprocs * threads_per_task, pdims(1), pdims(2),         &
178                          threads_per_task, nx+1, ny+1, nz, nr_timesteps_this_run, &
179                          average_cputime
180                         
[1015]181       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 108 )  num_acc_per_node
182       WRITE ( 18, 110 )
[222]183#else
[1111]184       WRITE ( 18, 100 )  TRIM( run_description_header ),                          &
185                          numprocs * threads_per_task, 1, 1,                       &
186                          threads_per_task, nx+1, ny+1, nz, nr_timesteps_this_run, &
187                          average_cputime
188
[1015]189       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 109 )  num_acc_per_node
190       WRITE ( 18, 110 )
[222]191#endif
[1]192       DO
193          ii = MAXLOC( sum )
194          i = ii(1)
195          IF ( sum(i) /= -1.0 )  THEN
196             WRITE ( 18, 102 ) &
197                log_point(i)%place, log_point(i)%sum,                &
198                log_point(i)%sum / log_point(1)%sum * 100.0,         &
199                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
200             sum(i) = -1.0
201          ELSE
202             EXIT
203          ENDIF
204       ENDDO
205    ENDIF
206
207
208!
209!-- The same procedure again for the individual measurements.
210!
211!-- Compute cpu-times in seconds
212    log_point_s%mtime  = log_point_s%mtime  / norm
213    log_point_s%sum    = log_point_s%sum    / norm
214    log_point_s%vector = log_point_s%vector / norm
215    WHERE ( log_point_s%counts /= 0 )
216       log_point_s%mean = log_point_s%sum / log_point_s%counts
217    END WHERE
218
219!
220!-- Collect cpu-times from all PEs and calculate statistics
221#if defined( __parallel )
222!
223!-- Set barrier in order to avoid that PE0 receives log_point_s-data
224!-- while still busy with receiving log_point-data (see above)
225    CALL MPI_BARRIER( comm2d, ierr )   
226#endif
227    IF ( myid == 0 )  THEN
228!
229!--    Initialize temporary arrays needed for statistics
230       pe_min = log_point_s%sum
231       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
232       pe_rms = 0.0
233
234#if defined( __parallel )
235!
236!--    Receive data from all PEs
237       DO  i = 1, numprocs-1
238          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
239                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
240          sender = status(MPI_SOURCE)
241          pe_log_points(:,sender) = pe_max
242       ENDDO
243       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
244!
245!--    Calculate mean of all PEs, store it on log_point_s%sum
246!--    and find minimum and maximum
247       DO  iii = 1, SIZE( log_point )
248          DO  i = 1, numprocs-1
249             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
250             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
251             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
252          ENDDO
253          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
254!
255!--       Calculate rms
256          DO  i = 0, numprocs-1
257             pe_rms(iii) = pe_rms(iii) + ( &
258                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
259                                         )**2
260          ENDDO
261          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
262       ENDDO
263    ELSE
264!
265!--    Send data to PE0 (pe_max is used as temporary storage to send
266!--    the data in order to avoid sending the data type log)
267       pe_max = log_point_s%sum
268       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
269                      ierr )
270#endif
271
272    ENDIF
273
274!
275!-- Write cpu-times
276    IF ( myid == 0 )  THEN
277!
278!--    Re-store sums
279       WHERE ( log_point_s%counts /= 0 )
280          sum = log_point_s%sum
281       ELSEWHERE
282          sum = -1.0
283       ENDWHERE
284
285!
286!--    Write cpu-times sorted by size
287       WRITE ( 18, 101 )
288       DO
289          ii = MAXLOC( sum )
290          i = ii(1)
291          IF ( sum(i) /= -1.0 )  THEN
292             WRITE ( 18, 102 ) &
293                log_point_s(i)%place, log_point_s(i)%sum, &
294                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
295                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
296             sum(i) = -1.0
297          ELSE
298             EXIT
299          ENDIF
300       ENDDO
301
302!
[683]303!--    Output of handling of MPI operations
[622]304       IF ( collective_wait )  THEN
305          WRITE ( 18, 103 )
306       ELSE
307          WRITE ( 18, 104 )
308       ENDIF
[683]309       IF ( synchronous_exchange )  THEN
310          WRITE ( 18, 105 )
311       ELSE
312          WRITE ( 18, 106 )
313       ENDIF
[622]314
315!
[1]316!--    Empty lines in order to create a gap to the results of the model
317!--    continuation runs
[683]318       WRITE ( 18, 107 )
[1]319
320!
321!--    Unit 18 is not needed anymore
322       CALL close_file( 18 )
323
324    ENDIF
325
326
[181]327100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
[1111]328            &') tasks *',I5,' threads):'//                                  &
329            'gridpoints (x/y/z): ',20X,I5,' * ',I5,' * ',I5/                &
330            'nr of timesteps: ',22X,I6/                                     &
331            'cpu time per grid point and timestep: ',5X,F8.5,' * 10**-6 s')
[1]332
333101 FORMAT (/'special measures:'/ &
334            &'-----------------------------------------------------------', &
335            &'--------------------')
336
337102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
[622]338103 FORMAT (/'Barriers are set in front of collective operations')
339104 FORMAT (/'No barriers are set in front of collective operations')
[683]340105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
341106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
342107 FORMAT (//)
[1111]343108 FORMAT ('Accelerator boards per node: ',14X,I2)
344109 FORMAT ('Accelerator boards: ',23X,I2)
[1015]345110 FORMAT ('----------------------------------------------------------',   &
346            &'------------'//&
347            &'place:                        mean        counts      min  ', &
348            &'     max       rms'/ &
349            &'                           sec.      %                sec. ', &
350            &'     sec.      sec.'/  &
351            &'-----------------------------------------------------------', &
352            &'-------------------')
[1]353
354 END SUBROUTINE cpu_statistics
355
Note: See TracBrowser for help on using the repository browser.