source: palm/trunk/SOURCE/cpu_statistics.f90 @ 1105

Last change on this file since 1105 was 1093, checked in by raasch, 12 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 10.6 KB
RevLine 
[1]1 SUBROUTINE cpu_statistics
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1]21! -----------------
[1093]22!
[1]23!
24! Former revisions:
25! -----------------
[3]26! $Id: cpu_statistics.f90 1093 2013-02-02 12:58:49Z maronga $
[83]27!
[1093]28! 1092 2013-02-02 11:24:22Z raasch
29! unused variables removed
30!
[1037]31! 1036 2012-10-22 13:43:42Z raasch
32! code put under GPL (PALM 3.9)
33!
[1017]34! 1015 2012-09-27 09:23:24Z raasch
35! output of accelerator board information
36!
[1015]37! 683 2011-02-09 14:25:15Z raasch
38! output of handling of ghostpoint exchange
39!
[623]40! 622 2010-12-10 08:08:13Z raasch
41! output of handling of collective operations
42!
[226]43! 222 2009-01-12 16:04:16Z letzel
44! Bugfix for nonparallel execution
45!
[198]46! 197 2008-09-16 15:29:03Z raasch
47! Format adjustments in order to allow CPU# > 999,
48! data are collected from PE0 in an ordered sequence which seems to avoid
49! hanging of processes on SGI-ICE
50!
[83]51! 82 2007-04-16 15:40:52Z raasch
52! Preprocessor directives for old systems removed
53!
[3]54! RCS Log replace by Id keyword, revision history cleaned up
55!
[1]56! Revision 1.13  2006/04/26 12:10:51  raasch
57! Output of number of threads per task, max = min in case of 1 PE
58!
59! Revision 1.1  1997/07/24 11:11:11  raasch
60! Initial revision
61!
62!
63! Description:
64! ------------
65! Analysis and output of the cpu-times measured. All PE results are collected
66! on PE0 in order to calculate the mean cpu-time over all PEs and other
67! statistics. The output is sorted according to the amount of cpu-time consumed
68! and output on PE0.
[3]69!------------------------------------------------------------------------------!
[1]70
71    USE cpulog
72    USE pegrid
73    USE control_parameters
74
75    IMPLICIT NONE
76
[1092]77    INTEGER    ::  i, ii(1), iii, sender
[1]78    REAL, SAVE ::  norm = 1.0
79    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
80    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
81
82
83!
84!-- Compute cpu-times in seconds
85    log_point%mtime  = log_point%mtime  / norm
86    log_point%sum    = log_point%sum    / norm
87    log_point%vector = log_point%vector / norm
88    WHERE ( log_point%counts /= 0 )
89       log_point%mean = log_point%sum / log_point%counts
90    END WHERE
91
92
93!
94!-- Collect cpu-times from all PEs and calculate statistics
95    IF ( myid == 0 )  THEN
96!
97!--    Allocate and initialize temporary arrays needed for statistics
98       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
99                 pe_rms( SIZE( log_point ) ),                              &
100                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
101       pe_min = log_point%sum
102       pe_max = log_point%sum    ! need to be set in case of 1 PE
103       pe_rms = 0.0
104
105#if defined( __parallel )
106!
107!--    Receive data from all PEs
108       DO  i = 1, numprocs-1
109          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
[197]110                         i, i, comm2d, status, ierr )
[1]111          sender = status(MPI_SOURCE)
112          pe_log_points(:,sender) = pe_max
113       ENDDO
114       pe_log_points(:,0) = log_point%sum   ! Results from PE0
115!
116!--    Calculate mean of all PEs, store it on log_point%sum
117!--    and find minimum and maximum
118       DO  iii = 1, SIZE( log_point )
119          DO  i = 1, numprocs-1
120             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
121             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
122             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
123          ENDDO
124          log_point(iii)%sum = log_point(iii)%sum / numprocs
125!
126!--       Calculate rms
127          DO  i = 0, numprocs-1
128             pe_rms(iii) = pe_rms(iii) + ( &
129                                 pe_log_points(iii,i) - log_point(iii)%sum &
130                                         )**2
131          ENDDO
132          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
133       ENDDO
134    ELSE
135!
136!--    Send data to PE0 (pe_max is used as temporary storage to send
137!--    the data in order to avoid sending the data type log)
138       ALLOCATE( pe_max( SIZE( log_point ) ) )
139       pe_max = log_point%sum
[197]140       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
[1]141                      ierr )
142#endif
143
144    ENDIF
145
146!
147!-- Write cpu-times
148    IF ( myid == 0 )  THEN
149!
150!--    Re-store sums
151       ALLOCATE( sum( SIZE( log_point ) ) )
152       WHERE ( log_point%counts /= 0 )
153          sum = log_point%sum
154       ELSEWHERE
155          sum = -1.0
156       ENDWHERE
157
158!
159!--    Write cpu-times sorted by size
160       CALL check_open( 18 )
[222]161#if defined( __parallel )
[1]162       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
[181]163                          numprocs * threads_per_task, pdims(1), pdims(2), &
[1]164                          threads_per_task
[1015]165       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 108 )  num_acc_per_node
166       WRITE ( 18, 110 )
[222]167#else
168       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
169                          numprocs * threads_per_task, 1, 1, &
170                          threads_per_task
[1015]171       IF ( num_acc_per_node /= 0 )  WRITE ( 18, 109 )  num_acc_per_node
172       WRITE ( 18, 110 )
[222]173#endif
[1]174       DO
175          ii = MAXLOC( sum )
176          i = ii(1)
177          IF ( sum(i) /= -1.0 )  THEN
178             WRITE ( 18, 102 ) &
179                log_point(i)%place, log_point(i)%sum,                &
180                log_point(i)%sum / log_point(1)%sum * 100.0,         &
181                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
182             sum(i) = -1.0
183          ELSE
184             EXIT
185          ENDIF
186       ENDDO
187    ENDIF
188
189
190!
191!-- The same procedure again for the individual measurements.
192!
193!-- Compute cpu-times in seconds
194    log_point_s%mtime  = log_point_s%mtime  / norm
195    log_point_s%sum    = log_point_s%sum    / norm
196    log_point_s%vector = log_point_s%vector / norm
197    WHERE ( log_point_s%counts /= 0 )
198       log_point_s%mean = log_point_s%sum / log_point_s%counts
199    END WHERE
200
201!
202!-- Collect cpu-times from all PEs and calculate statistics
203#if defined( __parallel )
204!
205!-- Set barrier in order to avoid that PE0 receives log_point_s-data
206!-- while still busy with receiving log_point-data (see above)
207    CALL MPI_BARRIER( comm2d, ierr )   
208#endif
209    IF ( myid == 0 )  THEN
210!
211!--    Initialize temporary arrays needed for statistics
212       pe_min = log_point_s%sum
213       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
214       pe_rms = 0.0
215
216#if defined( __parallel )
217!
218!--    Receive data from all PEs
219       DO  i = 1, numprocs-1
220          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
221                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
222          sender = status(MPI_SOURCE)
223          pe_log_points(:,sender) = pe_max
224       ENDDO
225       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
226!
227!--    Calculate mean of all PEs, store it on log_point_s%sum
228!--    and find minimum and maximum
229       DO  iii = 1, SIZE( log_point )
230          DO  i = 1, numprocs-1
231             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
232             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
233             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
234          ENDDO
235          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
236!
237!--       Calculate rms
238          DO  i = 0, numprocs-1
239             pe_rms(iii) = pe_rms(iii) + ( &
240                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
241                                         )**2
242          ENDDO
243          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
244       ENDDO
245    ELSE
246!
247!--    Send data to PE0 (pe_max is used as temporary storage to send
248!--    the data in order to avoid sending the data type log)
249       pe_max = log_point_s%sum
250       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
251                      ierr )
252#endif
253
254    ENDIF
255
256!
257!-- Write cpu-times
258    IF ( myid == 0 )  THEN
259!
260!--    Re-store sums
261       WHERE ( log_point_s%counts /= 0 )
262          sum = log_point_s%sum
263       ELSEWHERE
264          sum = -1.0
265       ENDWHERE
266
267!
268!--    Write cpu-times sorted by size
269       WRITE ( 18, 101 )
270       DO
271          ii = MAXLOC( sum )
272          i = ii(1)
273          IF ( sum(i) /= -1.0 )  THEN
274             WRITE ( 18, 102 ) &
275                log_point_s(i)%place, log_point_s(i)%sum, &
276                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
277                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
278             sum(i) = -1.0
279          ELSE
280             EXIT
281          ENDIF
282       ENDDO
283
284!
[683]285!--    Output of handling of MPI operations
[622]286       IF ( collective_wait )  THEN
287          WRITE ( 18, 103 )
288       ELSE
289          WRITE ( 18, 104 )
290       ENDIF
[683]291       IF ( synchronous_exchange )  THEN
292          WRITE ( 18, 105 )
293       ELSE
294          WRITE ( 18, 106 )
295       ENDIF
[622]296
297!
[1]298!--    Empty lines in order to create a gap to the results of the model
299!--    continuation runs
[683]300       WRITE ( 18, 107 )
[1]301
302!
303!--    Unit 18 is not needed anymore
304       CALL close_file( 18 )
305
306    ENDIF
307
308
[181]309100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
[1015]310            &') tasks *',I5,' threads):')
[1]311
312101 FORMAT (/'special measures:'/ &
313            &'-----------------------------------------------------------', &
314            &'--------------------')
315
316102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
[622]317103 FORMAT (/'Barriers are set in front of collective operations')
318104 FORMAT (/'No barriers are set in front of collective operations')
[683]319105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
320106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
321107 FORMAT (//)
[1015]322108 FORMAT ('Accelerator boards per node: ',I2)
323109 FORMAT ('Accelerator boards: ',I2)
324110 FORMAT ('----------------------------------------------------------',   &
325            &'------------'//&
326            &'place:                        mean        counts      min  ', &
327            &'     max       rms'/ &
328            &'                           sec.      %                sec. ', &
329            &'     sec.      sec.'/  &
330            &'-----------------------------------------------------------', &
331            &'-------------------')
[1]332
333 END SUBROUTINE cpu_statistics
334
Note: See TracBrowser for help on using the repository browser.