source: palm/trunk/SOURCE/cpu_statistics.f90 @ 729

Last change on this file since 729 was 683, checked in by raasch, 14 years ago

New:
---

optional exchange of ghost points in synchronous mode via MPI_SENDRCV,
steered by d3par parameter synchronous_exchange
(cpu_statistics, exchange_horiz, modules, parin)

openMP-parallelization of pressure solver (fft-method) for 2d-domain-decomposition
(poisfft, transpose)

Changed:


Errors:


mpt bugfix for netCDF4 usage (mrun)

  • Property svn:keywords set to Id
File size: 9.3 KB
Line 
1 SUBROUTINE cpu_statistics
2
3!------------------------------------------------------------------------------!
4! Current revisions:
5! -----------------
6! output of handling of ghostpoint exchange
7!
8! Former revisions:
9! -----------------
10! $Id: cpu_statistics.f90 683 2011-02-09 14:25:15Z heinze $
11!
12! 622 2010-12-10 08:08:13Z raasch
13! output of handling of collective operations
14!
15! 222 2009-01-12 16:04:16Z letzel
16! Bugfix for nonparallel execution
17!
18! 197 2008-09-16 15:29:03Z raasch
19! Format adjustments in order to allow CPU# > 999,
20! data are collected from PE0 in an ordered sequence which seems to avoid
21! hanging of processes on SGI-ICE
22!
23! 82 2007-04-16 15:40:52Z raasch
24! Preprocessor directives for old systems removed
25!
26! RCS Log replace by Id keyword, revision history cleaned up
27!
28! Revision 1.13  2006/04/26 12:10:51  raasch
29! Output of number of threads per task, max = min in case of 1 PE
30!
31! Revision 1.1  1997/07/24 11:11:11  raasch
32! Initial revision
33!
34!
35! Description:
36! ------------
37! Analysis and output of the cpu-times measured. All PE results are collected
38! on PE0 in order to calculate the mean cpu-time over all PEs and other
39! statistics. The output is sorted according to the amount of cpu-time consumed
40! and output on PE0.
41!------------------------------------------------------------------------------!
42
43    USE cpulog
44    USE pegrid
45    USE control_parameters
46
47    IMPLICIT NONE
48
49    INTEGER    ::  i, ii(1), iii, lp, sender
50    REAL, SAVE ::  norm = 1.0
51    REAL, DIMENSION(:),   ALLOCATABLE ::  pe_max, pe_min, pe_rms, sum
52    REAL, DIMENSION(:,:), ALLOCATABLE ::  pe_log_points
53
54
55!
56!-- Compute cpu-times in seconds
57    log_point%mtime  = log_point%mtime  / norm
58    log_point%sum    = log_point%sum    / norm
59    log_point%vector = log_point%vector / norm
60    WHERE ( log_point%counts /= 0 )
61       log_point%mean = log_point%sum / log_point%counts
62    END WHERE
63
64
65!
66!-- Collect cpu-times from all PEs and calculate statistics
67    IF ( myid == 0 )  THEN
68!
69!--    Allocate and initialize temporary arrays needed for statistics
70       ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), &
71                 pe_rms( SIZE( log_point ) ),                              &
72                 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) )
73       pe_min = log_point%sum
74       pe_max = log_point%sum    ! need to be set in case of 1 PE
75       pe_rms = 0.0
76
77#if defined( __parallel )
78!
79!--    Receive data from all PEs
80       DO  i = 1, numprocs-1
81          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
82                         i, i, comm2d, status, ierr )
83          sender = status(MPI_SOURCE)
84          pe_log_points(:,sender) = pe_max
85       ENDDO
86       pe_log_points(:,0) = log_point%sum   ! Results from PE0
87!
88!--    Calculate mean of all PEs, store it on log_point%sum
89!--    and find minimum and maximum
90       DO  iii = 1, SIZE( log_point )
91          DO  i = 1, numprocs-1
92             log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i)
93             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
94             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
95          ENDDO
96          log_point(iii)%sum = log_point(iii)%sum / numprocs
97!
98!--       Calculate rms
99          DO  i = 0, numprocs-1
100             pe_rms(iii) = pe_rms(iii) + ( &
101                                 pe_log_points(iii,i) - log_point(iii)%sum &
102                                         )**2
103          ENDDO
104          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
105       ENDDO
106    ELSE
107!
108!--    Send data to PE0 (pe_max is used as temporary storage to send
109!--    the data in order to avoid sending the data type log)
110       ALLOCATE( pe_max( SIZE( log_point ) ) )
111       pe_max = log_point%sum
112       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, &
113                      ierr )
114#endif
115
116    ENDIF
117
118!
119!-- Write cpu-times
120    IF ( myid == 0 )  THEN
121!
122!--    Re-store sums
123       ALLOCATE( sum( SIZE( log_point ) ) )
124       WHERE ( log_point%counts /= 0 )
125          sum = log_point%sum
126       ELSEWHERE
127          sum = -1.0
128       ENDWHERE
129
130!
131!--    Write cpu-times sorted by size
132       CALL check_open( 18 )
133#if defined( __parallel )
134       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
135                          numprocs * threads_per_task, pdims(1), pdims(2), &
136                          threads_per_task
137#else
138       WRITE ( 18, 100 )  TRIM( run_description_header ),        &
139                          numprocs * threads_per_task, 1, 1, &
140                          threads_per_task
141#endif
142       DO
143          ii = MAXLOC( sum )
144          i = ii(1)
145          IF ( sum(i) /= -1.0 )  THEN
146             WRITE ( 18, 102 ) &
147                log_point(i)%place, log_point(i)%sum,                &
148                log_point(i)%sum / log_point(1)%sum * 100.0,         &
149                log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
150             sum(i) = -1.0
151          ELSE
152             EXIT
153          ENDIF
154       ENDDO
155    ENDIF
156
157
158!
159!-- The same procedure again for the individual measurements.
160!
161!-- Compute cpu-times in seconds
162    log_point_s%mtime  = log_point_s%mtime  / norm
163    log_point_s%sum    = log_point_s%sum    / norm
164    log_point_s%vector = log_point_s%vector / norm
165    WHERE ( log_point_s%counts /= 0 )
166       log_point_s%mean = log_point_s%sum / log_point_s%counts
167    END WHERE
168
169!
170!-- Collect cpu-times from all PEs and calculate statistics
171#if defined( __parallel )
172!
173!-- Set barrier in order to avoid that PE0 receives log_point_s-data
174!-- while still busy with receiving log_point-data (see above)
175    CALL MPI_BARRIER( comm2d, ierr )   
176#endif
177    IF ( myid == 0 )  THEN
178!
179!--    Initialize temporary arrays needed for statistics
180       pe_min = log_point_s%sum
181       pe_max = log_point_s%sum    ! need to be set in case of 1 PE
182       pe_rms = 0.0
183
184#if defined( __parallel )
185!
186!--    Receive data from all PEs
187       DO  i = 1, numprocs-1
188          CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, &
189                         MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr )
190          sender = status(MPI_SOURCE)
191          pe_log_points(:,sender) = pe_max
192       ENDDO
193       pe_log_points(:,0) = log_point_s%sum   ! Results from PE0
194!
195!--    Calculate mean of all PEs, store it on log_point_s%sum
196!--    and find minimum and maximum
197       DO  iii = 1, SIZE( log_point )
198          DO  i = 1, numprocs-1
199             log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i)
200             pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) )
201             pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) )
202          ENDDO
203          log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs
204!
205!--       Calculate rms
206          DO  i = 0, numprocs-1
207             pe_rms(iii) = pe_rms(iii) + ( &
208                                 pe_log_points(iii,i) - log_point_s(iii)%sum &
209                                         )**2
210          ENDDO
211          pe_rms(iii) = SQRT( pe_rms(iii) / numprocs )
212       ENDDO
213    ELSE
214!
215!--    Send data to PE0 (pe_max is used as temporary storage to send
216!--    the data in order to avoid sending the data type log)
217       pe_max = log_point_s%sum
218       CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, &
219                      ierr )
220#endif
221
222    ENDIF
223
224!
225!-- Write cpu-times
226    IF ( myid == 0 )  THEN
227!
228!--    Re-store sums
229       WHERE ( log_point_s%counts /= 0 )
230          sum = log_point_s%sum
231       ELSEWHERE
232          sum = -1.0
233       ENDWHERE
234
235!
236!--    Write cpu-times sorted by size
237       WRITE ( 18, 101 )
238       DO
239          ii = MAXLOC( sum )
240          i = ii(1)
241          IF ( sum(i) /= -1.0 )  THEN
242             WRITE ( 18, 102 ) &
243                log_point_s(i)%place, log_point_s(i)%sum, &
244                log_point_s(i)%sum / log_point(1)%sum * 100.0, &
245                log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i)
246             sum(i) = -1.0
247          ELSE
248             EXIT
249          ENDIF
250       ENDDO
251
252!
253!--    Output of handling of MPI operations
254       IF ( collective_wait )  THEN
255          WRITE ( 18, 103 )
256       ELSE
257          WRITE ( 18, 104 )
258       ENDIF
259       IF ( synchronous_exchange )  THEN
260          WRITE ( 18, 105 )
261       ELSE
262          WRITE ( 18, 106 )
263       ENDIF
264
265!
266!--    Empty lines in order to create a gap to the results of the model
267!--    continuation runs
268       WRITE ( 18, 107 )
269
270!
271!--    Unit 18 is not needed anymore
272       CALL close_file( 18 )
273
274    ENDIF
275
276
277100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', &
278            &') tasks *',I5,' threads):'/ &
279             &'----------------------------------------------------------', &
280             &'------------'//&
281            &'place:                        mean        counts      min  ', &
282             &'     max       rms'/ &
283            &'                           sec.      %                sec. ', &
284             &'     sec.      sec.'/  &
285            &'-----------------------------------------------------------', &
286             &'-------------------')
287
288101 FORMAT (/'special measures:'/ &
289            &'-----------------------------------------------------------', &
290            &'--------------------')
291
292102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3))
293103 FORMAT (/'Barriers are set in front of collective operations')
294104 FORMAT (/'No barriers are set in front of collective operations')
295105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
296106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
297107 FORMAT (//)
298
299 END SUBROUTINE cpu_statistics
300
Note: See TracBrowser for help on using the repository browser.