source: palm/trunk/SOURCE/check_for_restart.f90 @ 1797

Last change on this file since 1797 was 1797, checked in by raasch, 8 years ago

Introduction of different data transfer modes; restart mechanism adjusted for nested runs; parameter consistency checks for nested runs; further formatting cleanup

  • Property svn:keywords set to Id
File size: 10.4 KB
RevLine 
[1682]1!> @file check_for_restart.f90
[1036]2!--------------------------------------------------------------------------------!
3! This file is part of PALM.
4!
5! PALM is free software: you can redistribute it and/or modify it under the terms
6! of the GNU General Public License as published by the Free Software Foundation,
7! either version 3 of the License, or (at your option) any later version.
8!
9! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
10! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
12!
13! You should have received a copy of the GNU General Public License along with
14! PALM. If not, see <http://www.gnu.org/licenses/>.
15!
[1310]16! Copyright 1997-2014 Leibniz Universitaet Hannover
[1036]17!--------------------------------------------------------------------------------!
18!
[247]19! Current revisions:
[1]20! -----------------
[1797]21! check now accounts for nesting mode
[1354]22!
[1321]23! Former revisions:
24! -----------------
25! $Id: check_for_restart.f90 1797 2016-03-21 16:50:28Z raasch $
26!
[1683]27! 1682 2015-10-07 23:56:08Z knoop
28! Code annotations made doxygen readable
29!
[1510]30! 1509 2014-12-16 08:56:46Z heinze
31! bugfix: prevent infinite loop in case of automatic restarts
32!
[1469]33! 1468 2014-09-24 14:06:57Z maronga
34! Added support for unscheduled job termination using the flag files
35! DO_STOP_NOW and DO_RESTART_NOW
36!
[1354]37! 1353 2014-04-08 15:21:23Z heinze
38! REAL constants provided with KIND-attribute
39!
[1321]40! 1320 2014-03-20 08:40:49Z raasch
[1320]41! ONLY-attribute added to USE-statements,
42! kind-parameters added to all INTEGER and REAL declaration statements,
43! kinds are defined in new module kinds,
44! revision history before 2012 removed,
45! comment fields (!:) to be used for variable explanations added to
46! all variable declaration statements
[1]47!
[1037]48! 1036 2012-10-22 13:43:42Z raasch
49! code put under GPL (PALM 3.9)
50!
[1033]51! 1032 2012-10-21 13:03:21Z letzel
52! minor reformatting
53!
[1]54! Revision 1.1  1998/03/18 20:06:51  raasch
55! Initial revision
56!
57!
58! Description:
59! ------------
[1682]60!> Set stop flag, if restart is neccessary because of expiring cpu-time or
61!> if it is forced by user
[1]62!------------------------------------------------------------------------------!
[1682]63 SUBROUTINE check_for_restart
64 
[1]65
[1320]66    USE control_parameters,                                                    &
67        ONLY:  coupling_mode, dt_restart, end_time, message_string,            &
68               run_description_header, simulated_time, terminate_coupled,      &
69               terminate_coupled_remote, terminate_run,                        &
70               termination_time_needed, time_restart,                          &
71               time_since_reference_point, write_binary
[1797]72
[1320]73    USE kinds
[1797]74
[1]75    USE pegrid
76
[1797]77    USE pmc_interface,                                                         &
78        ONLY:  comm_world_nesting, cpl_id, nested_run
79
[1]80    IMPLICIT NONE
81
[1797]82    INTEGER ::  global_communicator       !< global communicator to be used here
[1]83
[1797]84    LOGICAL ::  terminate_run_l           !<
85    LOGICAL ::  do_stop_now = .FALSE.     !<
86    LOGICAL ::  do_restart_now = .FALSE.  !<
[1]87
[1682]88    REAL(wp) ::  remaining_time !<
[1]89
[1320]90
[1]91!
92!-- Check remaining CPU-time
93    CALL local_tremain( remaining_time )
94
95!
96!-- If necessary set a flag to stop the model run
97    terminate_run_l = .FALSE.
[1320]98    IF ( remaining_time <= termination_time_needed  .AND.                      &
[1]99         write_binary(1:4) == 'true' )  THEN
100
101       terminate_run_l = .TRUE.
102    ENDIF
103
[1797]104!
105!-- Set the global communicator to be used (depends on the mode in which PALM is
106!-- running)
107    IF ( nested_run )  THEN
108       global_communicator = comm_world_nesting
109    ELSE
110       global_communicator = comm2d
111    ENDIF
112
[1]113#if defined( __parallel )
114!
115!-- Make a logical OR for all processes. Stop the model run if at least
[1797]116!-- one process has reached the time limit.
117    IF ( collective_wait )  CALL MPI_BARRIER( global_communicator, ierr )
118    CALL MPI_ALLREDUCE( terminate_run_l, terminate_run, 1, MPI_LOGICAL,     &
119                        MPI_LOR, global_communicator, ierr )
[1]120#else
121    terminate_run = terminate_run_l
122#endif
123
124!
125!-- Output that job will be terminated
126    IF ( terminate_run  .AND.  myid == 0 )  THEN
[1320]127       WRITE( message_string, * ) 'run will be terminated because it is ',     &
128                       'running out of job cpu limit & ',                      &
129                       'remaining time:         ', remaining_time, ' s',       &
[274]130                       'termination time needed:', termination_time_needed, ' s'
[247]131       CALL message( 'check_for_restart', 'PA0163', 0, 1, 0, 6, 0 )
[1]132    ENDIF
133
134!
[108]135!-- In case of coupled runs inform the remote model of the termination
136!-- and its reason, provided the remote model has not already been
137!-- informed of another termination reason (terminate_coupled > 0) before,
138!-- or vice versa (terminate_coupled_remote > 0).
[1320]139    IF ( terminate_run .AND. TRIM( coupling_mode ) /= 'uncoupled'  .AND.       &
[110]140         terminate_coupled == 0  .AND.  terminate_coupled_remote == 0 )  THEN
141
[108]142       terminate_coupled = 3
[1468]143
[222]144#if defined( __parallel )
[667]145       IF ( myid == 0 ) THEN
[1032]146          CALL MPI_SENDRECV( terminate_coupled,        1, MPI_INTEGER,         &
147                             target_id, 0,                                     &
148                             terminate_coupled_remote, 1, MPI_INTEGER,         &
149                             target_id, 0,                                     &
[667]150                             comm_inter, status, ierr )
151       ENDIF
[1320]152       CALL MPI_BCAST( terminate_coupled_remote, 1, MPI_INTEGER, 0, comm2d,    &
[1032]153                       ierr )
[222]154#endif
[108]155    ENDIF
156
[1468]157
[108]158!
[1468]159!-- Check if a flag file exists that forces a termination of the model
160    IF ( myid == 0 )  THEN
161       INQUIRE(FILE="DO_STOP_NOW", EXIST=do_stop_now)
162       INQUIRE(FILE="DO_RESTART_NOW", EXIST=do_restart_now)
163
164       IF ( do_stop_now .OR. do_restart_now )  THEN
165
166          terminate_run_l = .TRUE.
167
168          WRITE( message_string, * ) 'run will be terminated because user ',   &
169                                  'forced a job finialization using a flag',   &
170                                  'file:',                                     &
171                                  '&DO_STOP_NOW: ', do_stop_now,               &
172                                  '&DO_RESTART_NOW: ', do_restart_now 
173          CALL message( 'check_for_restart', 'PA0398', 0, 0, 0, 6, 0 )
174
175       ENDIF
176    ENDIF
177
178
179#if defined( __parallel )
180!
[1797]181!-- Make a logical OR for all processes. Stop the model run if a flag file has
182!-- been detected above.
183    IF ( collective_wait )  CALL MPI_BARRIER( global_communicator, ierr )
[1468]184    CALL MPI_ALLREDUCE( terminate_run_l, terminate_run, 1, MPI_LOGICAL,        &
[1797]185                        MPI_LOR, global_communicator, ierr )
[1468]186#else
187    terminate_run = terminate_run_l
188#endif
189
190!
191!-- In case of coupled runs inform the remote model of the termination
192!-- and its reason, provided the remote model has not already been
193!-- informed of another termination reason (terminate_coupled > 0) before,
194!-- or vice versa (terminate_coupled_remote > 0).
195    IF ( terminate_run .AND. coupling_mode /= 'uncoupled' .AND.                &
196         terminate_coupled == 0 .AND.  terminate_coupled_remote == 0 )  THEN
197
198       terminate_coupled = 6
199
200#if defined( __parallel )
201       IF ( myid == 0 ) THEN
202          CALL MPI_SENDRECV( terminate_coupled,        1, MPI_INTEGER,      &
203                             target_id,  0,                                 &
204                             terminate_coupled_remote, 1, MPI_INTEGER,      &
205                             target_id,  0,                                 &
206                             comm_inter, status, ierr )   
207       ENDIF
208       CALL MPI_BCAST( terminate_coupled_remote, 1, MPI_INTEGER, 0,         &
209                       comm2d, ierr ) 
210#endif
211
212    ENDIF
213
214!
215!-- Set the stop flag also, if restart is forced by user settings
216    IF ( time_restart /= 9999999.9_wp  .AND.                                   &
[291]217         time_restart < time_since_reference_point )  THEN
218
[1]219!
220!--    Restart is not neccessary, if the end time of the run (given by
221!--    the user) has been reached
222       IF ( simulated_time < end_time )  THEN
223          terminate_run = .TRUE.
224!
225!--       Increment restart time, if forced by user, otherwise set restart
226!--       time to default (no user restart)
[1353]227          IF ( dt_restart /= 9999999.9_wp )  THEN
[1]228             time_restart = time_restart + dt_restart
229          ELSE
[1353]230             time_restart = 9999999.9_wp
[1]231          ENDIF
232
[1320]233          WRITE( message_string, * ) 'run will be terminated due to user ',    &
234                                  'settings of',                               &
235                                  '&restart_time / dt_restart',                &
[274]236                                  '&new restart time is: ', time_restart, ' s' 
237          CALL message( 'check_for_restart', 'PA0164', 0, 0, 0, 6, 0 )
[247]238 
[108]239!
240!--       In case of coupled runs inform the remote model of the termination
241!--       and its reason, provided the remote model has not already been
242!--       informed of another termination reason (terminate_coupled > 0) before,
243!--       or vice versa (terminate_coupled_remote > 0).
[1320]244          IF ( coupling_mode /= 'uncoupled' .AND. terminate_coupled == 0       &
[206]245               .AND.  terminate_coupled_remote == 0 )  THEN
[110]246
[1353]247             IF ( dt_restart /= 9999999.9_wp )  THEN
[108]248                terminate_coupled = 4
249             ELSE
250                terminate_coupled = 5
251             ENDIF
[222]252#if defined( __parallel )
[667]253             IF ( myid == 0 ) THEN
[1032]254                CALL MPI_SENDRECV( terminate_coupled,        1, MPI_INTEGER,   &
255                                   target_id,  0,                              &
256                                   terminate_coupled_remote, 1, MPI_INTEGER,   &
257                                   target_id,  0,                              &
[667]258                                   comm_inter, status, ierr )   
259             ENDIF
[1320]260             CALL MPI_BCAST( terminate_coupled_remote, 1, MPI_INTEGER, 0,      &
[1032]261                             comm2d, ierr ) 
[222]262#endif
[108]263          ENDIF
264       ELSE
[1353]265          time_restart = 9999999.9_wp
[1]266       ENDIF
267    ENDIF
268
269!
270!-- If the run is stopped, set a flag file which is necessary to initiate
[1468]271!-- the start of a continuation run, except if the user forced to stop the
272!-- run without restart
[1797]273    IF ( terminate_run  .AND.  myid == 0  .AND.  cpl_id == 1  .AND.            &
274         .NOT. do_stop_now)  THEN
[1]275
276       OPEN ( 90, FILE='CONTINUE_RUN', FORM='FORMATTED' )
277       WRITE ( 90, '(A)' )  TRIM( run_description_header )
278       CLOSE ( 90 )
279
280    ENDIF
281
282
283 END SUBROUTINE check_for_restart
Note: See TracBrowser for help on using the repository browser.