Changeset 1318 for palm/trunk/SOURCE/cpulog.f90
- Timestamp:
- Mar 17, 2014 1:35:16 PM (10 years ago)
- File:
-
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/cpulog.f90
r1313 r1318 1 SUBROUTINE cpu_log( log_event, place, modus, barrierwait )1 MODULE cpulog 2 2 3 3 !--------------------------------------------------------------------------------! … … 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! former files/routines cpu_log and cpu_statistics combined to one module, 23 ! which also includes the former data module cpulog from the modules-file 23 24 ! 24 25 ! Former revisions: … … 55 56 56 57 USE control_parameters 57 USE cpulog58 USE indices, ONLY: nx, ny, nz 58 59 USE pegrid 59 60 60 61 IMPLICIT NONE 61 62 62 CHARACTER (LEN=*) :: modus, place 63 CHARACTER (LEN=*), OPTIONAL :: barrierwait 64 LOGICAL, SAVE :: first = .TRUE. 65 REAL :: mtime = 0.0, mtimevec = 0.0 66 TYPE(logpoint) :: log_event 63 PRIVATE 64 PUBLIC cpu_log, cpu_log_barrierwait, cpu_log_nowait, cpu_statistics, & 65 initial_wallclock_time, log_point, log_point_s 66 67 INTERFACE cpu_log 68 MODULE PROCEDURE cpu_log 69 END INTERFACE cpu_log 70 71 INTERFACE cpu_statistics 72 MODULE PROCEDURE cpu_statistics 73 END INTERFACE cpu_statistics 74 75 INTEGER, PARAMETER :: cpu_log_continue = 0, cpu_log_pause = 1, & 76 cpu_log_start = 2, cpu_log_stop = 3 77 78 LOGICAL :: cpu_log_barrierwait = .FALSE. 79 LOGICAL, PARAMETER :: cpu_log_nowait = .FALSE. 80 81 REAL :: initial_wallclock_time 82 83 TYPE logpoint 84 REAL :: isum, ivect, mean, mtime, mtimevec, sum, vector 85 INTEGER :: counts 86 CHARACTER (LEN=20) :: place 87 END TYPE logpoint 88 89 TYPE(logpoint), DIMENSION(100) :: log_point = logpoint( 0.0, 0.0, 0.0, & 90 0.0, 0.0, 0.0, 0.0, 0, ' ' ), & 91 log_point_s = logpoint( 0.0, 0.0, 0.0, & 92 0.0, 0.0, 0.0, 0.0, 0, ' ' ) 93 94 SAVE 95 96 CONTAINS 97 98 SUBROUTINE cpu_log( log_event, place, modus, barrierwait ) 99 100 IMPLICIT NONE 101 102 CHARACTER (LEN=*) :: modus, place 103 LOGICAL :: wait_allowed 104 LOGICAL, OPTIONAL :: barrierwait 105 LOGICAL, SAVE :: first = .TRUE. 106 REAL :: mtime = 0.0, mtimevec = 0.0 107 TYPE(logpoint) :: log_event 67 108 68 109 #if defined( __lc ) || defined( __decalpha ) 69 INTEGER(8) :: count, count_rate110 INTEGER(8) :: count, count_rate 70 111 #elif defined( __nec ) 71 INTEGER :: count, count_rate112 INTEGER :: count, count_rate 72 113 #elif defined( __ibm ) 73 INTEGER(8) :: IRTC 74 #endif 75 76 77 ! 78 !-- Initialize and check, respectively, point of measurement 79 IF ( log_event%place == ' ' ) THEN 80 log_event%place = place 81 ELSEIF ( log_event%place /= place ) THEN 82 WRITE( message_string, * ) 'wrong argument & expected: ', & 83 TRIM(log_event%place), ' given: ', TRIM( place ) 84 CALL message( 'cpu_log', 'PA0174', 1, 2, 0, 6, 0 ) 85 ENDIF 86 87 ! 88 !-- Take current time 114 INTEGER(8) :: IRTC 115 #endif 116 117 118 ! 119 !-- Initialize and check, respectively, point of measurement 120 IF ( log_event%place == ' ' ) THEN 121 log_event%place = place 122 ELSEIF ( log_event%place /= place ) THEN 123 WRITE( message_string, * ) 'wrong argument & expected: ', & 124 TRIM(log_event%place), ' given: ', TRIM( place ) 125 CALL message( 'cpu_log', 'PA0174', 1, 2, 0, 6, 0 ) 126 ENDIF 127 128 ! 129 !-- Determine, if barriers are allowed to set 130 IF ( PRESENT( barrierwait ) ) THEN 131 wait_allowed = barrierwait 132 ELSE 133 wait_allowed = .TRUE. 134 ENDIF 135 136 ! 137 !-- MPI barrier, if requested, in order to avoid measuring wait times 138 !-- caused by MPI routines waiting for other MPI routines of other 139 !-- PEs that have not yet finished 140 #if defined( __parallel ) 141 IF ( cpu_log_barrierwait .AND. wait_allowed .AND. & 142 ( modus == 'start' .OR. modus == 'continue' ) ) THEN 143 CALL MPI_BARRIER( comm2d, ierr ) 144 ENDIF 145 #endif 146 147 ! 148 !-- Take current time 89 149 #if defined( __lc ) || defined( __decalpha ) || defined( __nec ) 90 CALL SYSTEM_CLOCK( count, count_rate )91 mtime = REAL( count ) / REAL( count_rate )150 CALL SYSTEM_CLOCK( count, count_rate ) 151 mtime = REAL( count ) / REAL( count_rate ) 92 152 #elif defined( __ibm ) 93 mtime = IRTC( ) * 1E-9153 mtime = IRTC( ) * 1E-9 94 154 #else 95 message_string = 'no time measurement defined on this host' 96 CALL message( 'cpu_log', 'PA0175', 1, 2, 0, 6, 0 ) 97 #endif 98 99 ! 100 !-- Start, stop or pause measurement 101 IF ( modus == 'start' .OR. modus == 'continue' ) THEN 102 log_event%mtime = mtime 103 log_event%mtimevec = mtimevec 104 ELSEIF ( modus == 'pause' ) THEN 105 IF ( ( mtime - log_event%mtime ) < 0.0 .AND. first ) THEN 106 WRITE( message_string, * ) 'negative time interval occured', & 107 ' &PE',myid,' L=PAUSE "',TRIM(log_event%place),'" new=', & 108 mtime,' last=',log_event%mtime 109 CALL message( 'cpu_log', 'PA0176', 0, 1, -1, 6, 0 ) 110 first = .FALSE. 111 ENDIF 112 log_event%isum = log_event%isum + mtime - log_event%mtime 113 log_event%ivect = log_event%ivect + mtimevec - log_event%mtimevec 114 ELSEIF ( modus == 'stop' ) THEN 115 IF ( ( mtime - log_event%mtime + log_event%isum ) < 0.0 .AND. & 116 first ) THEN 117 WRITE( message_string, * ) 'negative time interval occured', & 118 ' &PE',myid,' L=STOP "',TRIM(log_event%place),'" new=', & 119 mtime,' last=',log_event%mtime,' isum=',log_event%isum 120 CALL message( 'cpu_log', 'PA0177', 0, 1, -1, 6, 0 ) 121 first = .FALSE. 122 ENDIF 123 log_event%mtime = mtime - log_event%mtime + log_event%isum 124 log_event%mtimevec = mtimevec - log_event%mtimevec + log_event%ivect 125 log_event%sum = log_event%sum + log_event%mtime 126 IF ( log_event%sum < 0.0 .AND. first ) THEN 127 WRITE( message_string, * ) 'negative time interval occured', & 128 ' &PE',myid,' L=STOP "',TRIM(log_event%place),'" sum=', & 129 log_event%sum,' mtime=',log_event%mtime 130 CALL message( 'cpu_log', 'PA0178', 0, 1, -1, 6, 0 ) 131 first = .FALSE. 132 ENDIF 133 log_event%vector = log_event%vector + log_event%mtimevec 134 log_event%counts = log_event%counts + 1 135 log_event%isum = 0.0 136 log_event%ivect = 0.0 137 ELSE 138 message_string = 'unknown modus of time measurement: ' // TRIM( modus ) 139 CALL message( 'cpu_log', 'PA0179', 0, 1, -1, 6, 0 ) 140 ENDIF 141 142 143 END SUBROUTINE cpu_log 155 message_string = 'no time measurement defined on this host' 156 CALL message( 'cpu_log', 'PA0175', 1, 2, 0, 6, 0 ) 157 #endif 158 159 ! 160 !-- Start, stop or pause measurement 161 IF ( modus == 'start' .OR. modus == 'continue' ) THEN 162 log_event%mtime = mtime 163 log_event%mtimevec = mtimevec 164 ELSEIF ( modus == 'pause' ) THEN 165 IF ( ( mtime - log_event%mtime ) < 0.0 .AND. first ) THEN 166 WRITE( message_string, * ) 'negative time interval occured', & 167 ' &PE',myid,' L=PAUSE "',TRIM(log_event%place),'" new=', & 168 mtime,' last=',log_event%mtime 169 CALL message( 'cpu_log', 'PA0176', 0, 1, -1, 6, 0 ) 170 first = .FALSE. 171 ENDIF 172 log_event%isum = log_event%isum + mtime - log_event%mtime 173 log_event%ivect = log_event%ivect + mtimevec - log_event%mtimevec 174 ELSEIF ( modus == 'stop' ) THEN 175 IF ( ( mtime - log_event%mtime + log_event%isum ) < 0.0 .AND. & 176 first ) THEN 177 WRITE( message_string, * ) 'negative time interval occured', & 178 ' &PE',myid,' L=STOP "',TRIM(log_event%place),'" new=', & 179 mtime,' last=',log_event%mtime,' isum=',log_event%isum 180 CALL message( 'cpu_log', 'PA0177', 0, 1, -1, 6, 0 ) 181 first = .FALSE. 182 ENDIF 183 log_event%mtime = mtime - log_event%mtime + log_event%isum 184 log_event%mtimevec = mtimevec - log_event%mtimevec + log_event%ivect 185 log_event%sum = log_event%sum + log_event%mtime 186 IF ( log_event%sum < 0.0 .AND. first ) THEN 187 WRITE( message_string, * ) 'negative time interval occured', & 188 ' &PE',myid,' L=STOP "',TRIM(log_event%place),'" sum=', & 189 log_event%sum,' mtime=',log_event%mtime 190 CALL message( 'cpu_log', 'PA0178', 0, 1, -1, 6, 0 ) 191 first = .FALSE. 192 ENDIF 193 log_event%vector = log_event%vector + log_event%mtimevec 194 log_event%counts = log_event%counts + 1 195 log_event%isum = 0.0 196 log_event%ivect = 0.0 197 ELSE 198 message_string = 'unknown modus of time measurement: ' // TRIM( modus ) 199 CALL message( 'cpu_log', 'PA0179', 0, 1, -1, 6, 0 ) 200 ENDIF 201 202 END SUBROUTINE cpu_log 203 204 205 SUBROUTINE cpu_statistics 206 !------------------------------------------------------------------------------! 207 ! Description: 208 ! ------------ 209 ! Analysis and output of the cpu-times measured. All PE results are collected 210 ! on PE0 in order to calculate the mean cpu-time over all PEs and other 211 ! statistics. The output is sorted according to the amount of cpu-time consumed 212 ! and output on PE0. 213 !------------------------------------------------------------------------------! 214 215 IMPLICIT NONE 216 217 INTEGER :: i, ii(1), iii, sender 218 REAL :: average_cputime 219 REAL, SAVE :: norm = 1.0 220 REAL, DIMENSION(:), ALLOCATABLE :: pe_max, pe_min, pe_rms, sum 221 REAL, DIMENSION(:,:), ALLOCATABLE :: pe_log_points 222 223 224 ! 225 !-- Compute cpu-times in seconds 226 log_point%mtime = log_point%mtime / norm 227 log_point%sum = log_point%sum / norm 228 log_point%vector = log_point%vector / norm 229 WHERE ( log_point%counts /= 0 ) 230 log_point%mean = log_point%sum / log_point%counts 231 END WHERE 232 233 234 ! 235 !-- Collect cpu-times from all PEs and calculate statistics 236 IF ( myid == 0 ) THEN 237 ! 238 !-- Allocate and initialize temporary arrays needed for statistics 239 ALLOCATE( pe_max( SIZE( log_point ) ), pe_min( SIZE( log_point ) ), & 240 pe_rms( SIZE( log_point ) ), & 241 pe_log_points( SIZE( log_point ), 0:numprocs-1 ) ) 242 pe_min = log_point%sum 243 pe_max = log_point%sum ! need to be set in case of 1 PE 244 pe_rms = 0.0 245 246 #if defined( __parallel ) 247 ! 248 !-- Receive data from all PEs 249 DO i = 1, numprocs-1 250 CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, & 251 i, i, comm2d, status, ierr ) 252 sender = status(MPI_SOURCE) 253 pe_log_points(:,sender) = pe_max 254 ENDDO 255 pe_log_points(:,0) = log_point%sum ! Results from PE0 256 ! 257 !-- Calculate mean of all PEs, store it on log_point%sum 258 !-- and find minimum and maximum 259 DO iii = 1, SIZE( log_point ) 260 DO i = 1, numprocs-1 261 log_point(iii)%sum = log_point(iii)%sum + pe_log_points(iii,i) 262 pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) ) 263 pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) ) 264 ENDDO 265 log_point(iii)%sum = log_point(iii)%sum / numprocs 266 ! 267 !-- Calculate rms 268 DO i = 0, numprocs-1 269 pe_rms(iii) = pe_rms(iii) + ( & 270 pe_log_points(iii,i) - log_point(iii)%sum & 271 )**2 272 ENDDO 273 pe_rms(iii) = SQRT( pe_rms(iii) / numprocs ) 274 ENDDO 275 ELSE 276 ! 277 !-- Send data to PE0 (pe_max is used as temporary storage to send 278 !-- the data in order to avoid sending the data type log) 279 ALLOCATE( pe_max( SIZE( log_point ) ) ) 280 pe_max = log_point%sum 281 CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, myid, comm2d, & 282 ierr ) 283 #endif 284 285 ENDIF 286 287 ! 288 !-- Write cpu-times 289 IF ( myid == 0 ) THEN 290 ! 291 !-- Re-store sums 292 ALLOCATE( sum( SIZE( log_point ) ) ) 293 WHERE ( log_point%counts /= 0 ) 294 sum = log_point%sum 295 ELSEWHERE 296 sum = -1.0 297 ENDWHERE 298 299 ! 300 !-- Get total time in order to calculate CPU-time per gridpoint and timestep 301 IF ( nr_timesteps_this_run /= 0 ) THEN 302 average_cputime = log_point(1)%sum / REAL( (nx+1) * (ny+1) * nz ) / & 303 REAL( nr_timesteps_this_run ) * 1E6 ! in micro-sec 304 ELSE 305 average_cputime = -1.0 306 ENDIF 307 308 ! 309 !-- Write cpu-times sorted by size 310 CALL check_open( 18 ) 311 #if defined( __parallel ) 312 WRITE ( 18, 100 ) TRIM( run_description_header ), & 313 numprocs * threads_per_task, pdims(1), pdims(2), & 314 threads_per_task, nx+1, ny+1, nz, nr_timesteps_this_run, & 315 average_cputime 316 317 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 108 ) num_acc_per_node 318 WRITE ( 18, 110 ) 319 #else 320 WRITE ( 18, 100 ) TRIM( run_description_header ), & 321 numprocs * threads_per_task, 1, 1, & 322 threads_per_task, nx+1, ny+1, nz, nr_timesteps_this_run, & 323 average_cputime 324 325 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 109 ) num_acc_per_node 326 WRITE ( 18, 110 ) 327 #endif 328 DO 329 ii = MAXLOC( sum ) 330 i = ii(1) 331 IF ( sum(i) /= -1.0 ) THEN 332 WRITE ( 18, 102 ) & 333 log_point(i)%place, log_point(i)%sum, & 334 log_point(i)%sum / log_point(1)%sum * 100.0, & 335 log_point(i)%counts, pe_min(i), pe_max(i), pe_rms(i) 336 sum(i) = -1.0 337 ELSE 338 EXIT 339 ENDIF 340 ENDDO 341 ENDIF 342 343 344 ! 345 !-- The same procedure again for the individual measurements. 346 ! 347 !-- Compute cpu-times in seconds 348 log_point_s%mtime = log_point_s%mtime / norm 349 log_point_s%sum = log_point_s%sum / norm 350 log_point_s%vector = log_point_s%vector / norm 351 WHERE ( log_point_s%counts /= 0 ) 352 log_point_s%mean = log_point_s%sum / log_point_s%counts 353 END WHERE 354 355 ! 356 !-- Collect cpu-times from all PEs and calculate statistics 357 #if defined( __parallel ) 358 ! 359 !-- Set barrier in order to avoid that PE0 receives log_point_s-data 360 !-- while still busy with receiving log_point-data (see above) 361 CALL MPI_BARRIER( comm2d, ierr ) 362 #endif 363 IF ( myid == 0 ) THEN 364 ! 365 !-- Initialize temporary arrays needed for statistics 366 pe_min = log_point_s%sum 367 pe_max = log_point_s%sum ! need to be set in case of 1 PE 368 pe_rms = 0.0 369 370 #if defined( __parallel ) 371 ! 372 !-- Receive data from all PEs 373 DO i = 1, numprocs-1 374 CALL MPI_RECV( pe_max(1), SIZE( log_point ), MPI_REAL, & 375 MPI_ANY_SOURCE, MPI_ANY_TAG, comm2d, status, ierr ) 376 sender = status(MPI_SOURCE) 377 pe_log_points(:,sender) = pe_max 378 ENDDO 379 pe_log_points(:,0) = log_point_s%sum ! Results from PE0 380 ! 381 !-- Calculate mean of all PEs, store it on log_point_s%sum 382 !-- and find minimum and maximum 383 DO iii = 1, SIZE( log_point ) 384 DO i = 1, numprocs-1 385 log_point_s(iii)%sum = log_point_s(iii)%sum + pe_log_points(iii,i) 386 pe_min(iii) = MIN( pe_min(iii), pe_log_points(iii,i) ) 387 pe_max(iii) = MAX( pe_max(iii), pe_log_points(iii,i) ) 388 ENDDO 389 log_point_s(iii)%sum = log_point_s(iii)%sum / numprocs 390 ! 391 !-- Calculate rms 392 DO i = 0, numprocs-1 393 pe_rms(iii) = pe_rms(iii) + ( & 394 pe_log_points(iii,i) - log_point_s(iii)%sum & 395 )**2 396 ENDDO 397 pe_rms(iii) = SQRT( pe_rms(iii) / numprocs ) 398 ENDDO 399 ELSE 400 ! 401 !-- Send data to PE0 (pe_max is used as temporary storage to send 402 !-- the data in order to avoid sending the data type log) 403 pe_max = log_point_s%sum 404 CALL MPI_SEND( pe_max(1), SIZE( log_point ), MPI_REAL, 0, 0, comm2d, & 405 ierr ) 406 #endif 407 408 ENDIF 409 410 ! 411 !-- Write cpu-times 412 IF ( myid == 0 ) THEN 413 ! 414 !-- Re-store sums 415 WHERE ( log_point_s%counts /= 0 ) 416 sum = log_point_s%sum 417 ELSEWHERE 418 sum = -1.0 419 ENDWHERE 420 421 ! 422 !-- Write cpu-times sorted by size 423 WRITE ( 18, 101 ) 424 DO 425 ii = MAXLOC( sum ) 426 i = ii(1) 427 IF ( sum(i) /= -1.0 ) THEN 428 WRITE ( 18, 102 ) & 429 log_point_s(i)%place, log_point_s(i)%sum, & 430 log_point_s(i)%sum / log_point(1)%sum * 100.0, & 431 log_point_s(i)%counts, pe_min(i), pe_max(i), pe_rms(i) 432 sum(i) = -1.0 433 ELSE 434 EXIT 435 ENDIF 436 ENDDO 437 438 ! 439 !-- Output of handling of MPI operations 440 IF ( collective_wait ) THEN 441 WRITE ( 18, 103 ) 442 ELSE 443 WRITE ( 18, 104 ) 444 ENDIF 445 IF ( cpu_log_barrierwait ) WRITE ( 18, 111 ) 446 IF ( synchronous_exchange ) THEN 447 WRITE ( 18, 105 ) 448 ELSE 449 WRITE ( 18, 106 ) 450 ENDIF 451 452 ! 453 !-- Empty lines in order to create a gap to the results of the model 454 !-- continuation runs 455 WRITE ( 18, 107 ) 456 457 ! 458 !-- Unit 18 is not needed anymore 459 CALL close_file( 18 ) 460 461 ENDIF 462 463 464 100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', & 465 &') tasks *',I5,' threads):'// & 466 'gridpoints (x/y/z): ',20X,I5,' * ',I5,' * ',I5/ & 467 'nr of timesteps: ',22X,I6/ & 468 'cpu time per grid point and timestep: ',5X,F8.5,' * 10**-6 s') 469 470 101 FORMAT (/'special measures:'/ & 471 &'-----------------------------------------------------------', & 472 &'--------------------') 473 474 102 FORMAT (A20,2X,F9.3,2X,F7.2,1X,I7,3(1X,F9.3)) 475 103 FORMAT (/'Barriers are set in front of collective operations') 476 104 FORMAT (/'No barriers are set in front of collective operations') 477 105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV') 478 106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV') 479 107 FORMAT (//) 480 108 FORMAT ('Accelerator boards per node: ',14X,I2) 481 109 FORMAT ('Accelerator boards: ',23X,I2) 482 110 FORMAT ('----------------------------------------------------------', & 483 &'------------'//& 484 &'place: mean counts min ', & 485 &' max rms'/ & 486 &' sec. % sec. ', & 487 &' sec. sec.'/ & 488 &'-----------------------------------------------------------', & 489 &'-------------------') 490 111 FORMAT (/'Barriers are set at beginning (start/continue) of measurements') 491 492 END SUBROUTINE cpu_statistics 493 494 END MODULE cpulog
Note: See TracChangeset
for help on using the changeset viewer.