source: palm/trunk/SOURCE/palm.f90 @ 1762

Last change on this file since 1762 was 1762, checked in by hellstea, 8 years ago

Introduction of nested domain system

  • Property svn:keywords set to Id
File size: 14.4 KB
RevLine 
[1682]1!> @file palm.f90
[1036]2!--------------------------------------------------------------------------------!
3! This file is part of PALM.
4!
5! PALM is free software: you can redistribute it and/or modify it under the terms
6! of the GNU General Public License as published by the Free Software Foundation,
7! either version 3 of the License, or (at your option) any later version.
8!
9! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
10! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
12!
13! You should have received a copy of the GNU General Public License along with
14! PALM. If not, see <http://www.gnu.org/licenses/>.
15!
[1310]16! Copyright 1997-2014 Leibniz Universitaet Hannover
[1036]17!--------------------------------------------------------------------------------!
18!
[484]19! Current revisions:
[1]20! -----------------
[1762]21! Introduction of nested domain feature
[1748]22!
[1321]23! Former revisions:
24! -----------------
25! $Id: palm.f90 1762 2016-02-25 12:31:13Z hellstea $
26!
[1748]27! 1747 2016-02-08 12:25:53Z raasch
28! OpenACC-adjustment for new surface layer parameterization
29!
[1683]30! 1682 2015-10-07 23:56:08Z knoop
31! Code annotations made doxygen readable
32!
[1669]33! 1668 2015-09-23 13:45:36Z raasch
34! warning replaced by abort in case of failed user interface check
35!
[1667]36! 1666 2015-09-23 07:31:10Z raasch
37! check for user's interface version added
38!
[1483]39! 1482 2014-10-18 12:34:45Z raasch
40! adjustments for using CUDA-aware OpenMPI
41!
[1469]42! 1468 2014-09-24 14:06:57Z maronga
43! Adapted for use on up to 6-digit processor cores
44!
[1403]45! 1402 2014-05-09 14:25:13Z raasch
46! location messages added
47!
[1375]48! 1374 2014-04-25 12:55:07Z raasch
49! bugfix: various modules added
50!
[1321]51! 1320 2014-03-20 08:40:49Z raasch
[1320]52! ONLY-attribute added to USE-statements,
53! kind-parameters added to all INTEGER and REAL declaration statements,
54! kinds are defined in new module kinds,
55! old module precision_kind is removed,
56! revision history before 2012 removed,
57! comment fields (!:) to be used for variable explanations added to
58! all variable declaration statements
[77]59!
[1319]60! 1318 2014-03-17 13:35:16Z raasch
61! module interfaces removed
62!
[1242]63! 1241 2013-10-30 11:36:58Z heinze
64! initialization of nuding and large scale forcing from external file
65!
[1222]66! 1221 2013-09-10 08:59:13Z raasch
67! +wall_flags_00, rflags_invers, rflags_s_inner in copyin statement
68!
[1213]69! 1212 2013-08-15 08:46:27Z raasch
70! +tri in copyin statement
71!
[1182]72! 1179 2013-06-14 05:57:58Z raasch
73! ref_state added to copyin-list
74!
[1114]75! 1113 2013-03-10 02:48:14Z raasch
76! openACC statements modified
77!
[1112]78! 1111 2013-03-08 23:54:10Z raasch
79! openACC statements updated
80!
[1093]81! 1092 2013-02-02 11:24:22Z raasch
82! unused variables removed
83!
[1037]84! 1036 2012-10-22 13:43:42Z raasch
85! code put under GPL (PALM 3.9)
86!
[1017]87! 1015 2012-09-27 09:23:24Z raasch
[863]88! Version number changed from 3.8 to 3.8a.
[1017]89! OpenACC statements added + code changes required for GPU optimization
[863]90!
[850]91! 849 2012-03-15 10:35:09Z raasch
92! write_particles renamed lpm_write_restart_file
93!
[1]94! Revision 1.1  1997/07/24 11:23:35  raasch
95! Initial revision
96!
97!
98! Description:
99! ------------
[1682]100!> Large-Eddy Simulation (LES) model for the convective boundary layer,
101!> optimized for use on parallel machines (implementation realized using the
102!> Message Passing Interface (MPI)). The model can also be run on vector machines
103!> (less well optimized) and workstations. Versions for the different types of
104!> machines are controlled via cpp-directives.
105!> Model runs are only feasible using the ksh-script mrun.
[1]106!------------------------------------------------------------------------------!
[1682]107 PROGRAM palm
108 
[1]109
[1374]110    USE arrays_3d
[1]111
[1320]112    USE control_parameters,                                                    &
113        ONLY:  coupling_char, coupling_mode, do2d_at_begin, do3d_at_begin,     &
[1666]114               io_blocks, io_group, large_scale_forcing, message_string,       &
[1762]115               nest_domain, nudging, simulated_time, simulated_time_chr,       &
[1666]116               user_interface_current_revision,                                &
117               user_interface_required_revision, version, wall_heatflux,       &
118               write_binary
[1320]119
120    USE cpulog,                                                                &
121        ONLY:  cpu_log, log_point, cpu_statistics
122
[1374]123    USE grid_variables,                                                        &
124        ONLY:  fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, wall_e_x, wall_e_y, &
125               wall_u, wall_v, wall_w_x, wall_w_y
126
127    USE indices,                                                               &
[1666]128        ONLY:  ngp_2dh, ngp_2dh_s_inner, nzb_diff_s_inner, nzb_diff_s_outer,   &
129               nzb_diff_u, nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner,  &
130               nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner,             &
131               nzb_w_outer, rflags_invers, rflags_s_inner, wall_flags_0,       &
132               wall_flags_00
[1374]133
[1320]134    USE kinds
135
136    USE ls_forcing_mod,                                                        &
137        ONLY:  init_ls_forcing
138
139    USE nudge_mod,                                                             &
140        ONLY:  init_nudge
141
142    USE particle_attributes,                                                   &
143        ONLY:  particle_advection
144
[1]145    USE pegrid
146
[1762]147#if defined( PMC_ACTIVE )
148    USE pmc_interface,                                                         &
149        ONLY:  cpl_id, pmci_init, pmci_modelconfiguration
150#endif
151
[1374]152    USE statistics,                                                            &
153        ONLY:  hom, rmask, weight_pres, weight_substep
154
[1747]155    USE surface_layer_fluxes_mod,                                              &
156        ONLY:  pt1, qv1, uv_total
157
[1015]158#if defined( __openacc )
159    USE OPENACC
160#endif
161
[1]162    IMPLICIT NONE
163
164!
165!-- Local variables
[1682]166    CHARACTER(LEN=9)  ::  time_to_string  !<
167    CHARACTER(LEN=10) ::  env_string      !< to store string of environment var
168    INTEGER(iwp)      ::  env_stat        !< to hold status of GET_ENV
169    INTEGER(iwp)      ::  i               !<
170    INTEGER(iwp)      ::  myid_openmpi    !< OpenMPI local rank for CUDA aware MPI
[1015]171#if defined( __openacc )
[1682]172    REAL(wp), DIMENSION(100) ::  acc_dum     !<
[1015]173#endif
[1]174
[1539]175    version = 'PALM 4.0'
[1666]176    user_interface_required_revision = 'r1663'
[75]177
[1]178#if defined( __parallel )
179!
180!-- MPI initialisation. comm2d is preliminary set, because
181!-- it will be defined in init_pegrid but is used before in cpu_log.
182    CALL MPI_INIT( ierr )
[1762]183
184#if defined( PMC_ACTIVE )
185!
186!-- Initialize the coupling for nested-domain runs
187    CALL pmci_init( comm_palm )
188    comm2d = comm_palm
189
190    IF ( cpl_id >= 2 )  THEN
191       nest_domain = .TRUE.
192       WRITE( coupling_char, '(A1,I1.1)') '_', cpl_id
193    ENDIF
194
195    CALL MPI_COMM_SIZE( comm_palm, numprocs, ierr )
196    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
197#else
[1]198    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
[206]199    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
[1]200    comm_palm = MPI_COMM_WORLD
201    comm2d    = MPI_COMM_WORLD
202!
[1762]203!-- Initialize PE topology in case of coupled atmosphere-ocean runs (comm_palm
204!-- will be splitted in init_coupling)
[206]205    CALL init_coupling
[102]206#endif
[1762]207#endif
[102]208
[1015]209#if defined( __openacc )
[102]210!
[1482]211!-- Get the local MPI rank in case of CUDA aware OpenMPI. Important, if there
212!-- is more than one accelerator board on the node
213    CALL GET_ENVIRONMENT_VARIABLE('OMPI_COMM_WORLD_LOCAL_RANK',                &
214         VALUE=env_string, STATUS=env_stat )
215    READ( env_string, '(I1)' )  myid_openmpi
216    PRINT*, '### local_rank = ', myid_openmpi, '  status=',env_stat
217!
[1015]218!-- Get the number of accelerator boards per node and assign the MPI processes
219!-- to these boards
[1092]220    PRINT*, '*** ACC_DEVICE_NVIDIA = ', ACC_DEVICE_NVIDIA
[1015]221    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
[1092]222    IF ( numprocs == 1  .AND.  num_acc_per_node > 0 )  num_acc_per_node = 1
[1482]223    PRINT*, '*** myid = ', myid_openmpi, ' num_acc_per_node = ', num_acc_per_node
224    acc_rank = MOD( myid_openmpi, num_acc_per_node )
[1015]225    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
226!
227!-- Test output (to be removed later)
[1482]228    WRITE (*,'(A,I6,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid_openmpi,   &
229                                      ' to CPU ', acc_rank, ' Devices: ',      &
230                                      num_acc_per_node, ' connected to:',      &
[1015]231                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
232#endif
[1092]233
[1015]234!
235!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
236!-- region
237    !$acc data copyin( acc_dum )
238
239!
[1]240!-- Initialize measuring of the CPU-time remaining to the run
241    CALL local_tremain_ini
242
243!
244!-- Start of total CPU time measuring.
245    CALL cpu_log( log_point(1), 'total', 'start' )
246    CALL cpu_log( log_point(2), 'initialisation', 'start' )
247
248!
[206]249!-- Open a file for debug output
[1468]250    WRITE (myid_char,'(''_'',I6.6)')  myid
[206]251    OPEN( 9, FILE='DEBUG'//TRIM( coupling_char )//myid_char, FORM='FORMATTED' )
252
253!
[1]254!-- Initialize dvrp logging. Also, one PE maybe split from the global
255!-- communicator for doing the dvrp output. In that case, the number of
256!-- PEs available for PALM is reduced by one and communicator comm_palm
257!-- is changed respectively.
258#if defined( __parallel )
259    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
260!
[102]261!-- TEST OUTPUT (TO BE REMOVED)
262    WRITE(9,*) '*** coupling_mode = "', TRIM( coupling_mode ), '"'
263    CALL LOCAL_FLUSH( 9 )
[215]264    IF ( TRIM( coupling_mode ) /= 'uncoupled' )  THEN
265       PRINT*, '*** PE', myid, ' Global target PE:', target_id, &
266               TRIM( coupling_mode )
267    ENDIF
[102]268#endif
269
[108]270    CALL init_dvrp_logging
271
[102]272!
[108]273!-- Read control parameters from NAMELIST files and read environment-variables
274    CALL parin
275
276!
[1666]277!-- Check for the user's interface version
278    IF ( user_interface_current_revision /= user_interface_required_revision )  &
279    THEN
280       message_string = 'current user-interface revision "' //                  &
281                        TRIM( user_interface_current_revision ) // '" does ' // &
282                        'not match the required revision ' //                   &
283                        TRIM( user_interface_required_revision )
[1668]284        CALL message( 'palm', 'PA0169', 1, 2, 0, 6, 0 )
[1666]285    ENDIF
286
287!
[108]288!-- Determine processor topology and local array indices
289    CALL init_pegrid
290
291!
[1]292!-- Generate grid parameters
293    CALL init_grid
294
295!
[1241]296!-- Initialize nudging if required
297    IF ( nudging )  THEN
298       CALL init_nudge
299    ENDIF
300
301!
302!-- Initialize reading of large scale forcing from external file - if required
303    IF ( large_scale_forcing )  THEN
304       CALL init_ls_forcing
305    ENDIF
306
307!
[1]308!-- Check control parameters and deduce further quantities
309    CALL check_parameters
310
311!
312!-- Initialize all necessary variables
313    CALL init_3d_model
314
[1762]315#if defined( PMC_ACTIVE )
[1]316!
[1762]317!-- Coupling protocol setup for nested-domain runs
318    CALL pmci_modelconfiguration
319#endif
320
321!
[1]322!-- Output of program header
323    IF ( myid == 0 )  CALL header
324
325    CALL cpu_log( log_point(2), 'initialisation', 'stop' )
326
327!
328!-- Set start time in format hh:mm:ss
329    simulated_time_chr = time_to_string( simulated_time )
330
331!
332!-- If required, output of initial arrays
333    IF ( do2d_at_begin )  THEN
334       CALL data_output_2d( 'xy', 0 )
335       CALL data_output_2d( 'xz', 0 )
336       CALL data_output_2d( 'yz', 0 )
337    ENDIF
338    IF ( do3d_at_begin )  THEN
339       CALL data_output_3d( 0 )
340    ENDIF
341
342!
[1015]343!-- Declare and initialize variables in the accelerator memory with their
344!-- host values
[1113]345    !$acc  data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
[1212]346    !$acc       copyin( tri, tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ref_state, ug, u_init, vg, v_init, zu, zw )   &
[1747]347    !$acc       copyin( hom, ol, pt1, qs, qsws, qswst, qv1, rif_wall, shf, ts, tswst, us, usws, uswst, uv_total, vsws, vswst, z0, z0h )      &
[1015]348    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
349    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
350    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
[1221]351    !$acc       copyin( nzb_w_outer, rflags_invers, rflags_s_inner, rmask, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0, wall_flags_00 )  &
352    !$acc       copyin( ngp_2dh, ngp_2dh_s_inner )  &
[1113]353    !$acc       copyin( weight_pres, weight_substep )
[1015]354!
[495]355!-- Integration of the model equations using timestep-scheme
[1]356    CALL time_integration
357
358!
[495]359!-- If required, write binary data for restart runs
360    IF ( write_binary(1:4) == 'true' )  THEN
[759]361
362       CALL cpu_log( log_point(22), 'write_3d_binary', 'start' )
363
[1402]364       CALL location_message( 'writing restart data', .FALSE. )
365
[759]366       CALL check_open( 14 )
367
368       DO  i = 0, io_blocks-1
369          IF ( i == io_group )  THEN
[1]370!
[759]371!--          Write flow field data
372             CALL write_3d_binary
373          ENDIF
374#if defined( __parallel )
375          CALL MPI_BARRIER( comm2d, ierr )
376#endif
377       ENDDO
378
[1402]379       CALL location_message( 'finished', .TRUE. )
380
[759]381       CALL cpu_log( log_point(22), 'write_3d_binary', 'stop' )
382
[495]383!
384!--    If required, write particle data
[849]385       IF ( particle_advection )  CALL lpm_write_restart_file
[495]386    ENDIF
[1]387
388!
389!-- If required, repeat output of header including the required CPU-time
390    IF ( myid == 0 )  CALL header
391!
392!-- If required, final user-defined actions, and
393!-- last actions on the open files and close files. Unit 14 was opened
394!-- in write_3d_binary but it is closed here, to allow writing on this
395!-- unit in routine user_last_actions.
396    CALL cpu_log( log_point(4), 'last actions', 'start' )
[759]397    DO  i = 0, io_blocks-1
398       IF ( i == io_group )  THEN
399          CALL user_last_actions
400          IF ( write_binary(1:4) == 'true' )  CALL close_file( 14 )
401       ENDIF
402#if defined( __parallel )
403       CALL MPI_BARRIER( comm2d, ierr )
404#endif
405    ENDDO
[1]406    CALL close_file( 0 )
407    CALL close_dvrp
408    CALL cpu_log( log_point(4), 'last actions', 'stop' )
409
[102]410#if defined( __mpi2 )
[1]411!
[206]412!-- Test exchange via intercommunicator in case of a MPI-2 coupling
[102]413    IF ( coupling_mode == 'atmosphere_to_ocean' )  THEN
414       i = 12345 + myid
415       CALL MPI_SEND( i, 1, MPI_INTEGER, myid, 11, comm_inter, ierr )
416    ELSEIF ( coupling_mode == 'ocean_to_atmosphere' )  THEN
417       CALL MPI_RECV( i, 1, MPI_INTEGER, myid, 11, comm_inter, status, ierr )
418       PRINT*, '### myid: ', myid, '   received from atmosphere:  i = ', i
419    ENDIF
420#endif
421
422!
[1015]423!-- Close the OpenACC dummy data region
424    !$acc end data
425    !$acc end data
426
427!
[1]428!-- Take final CPU-time for CPU-time analysis
429    CALL cpu_log( log_point(1), 'total', 'stop' )
430    CALL cpu_statistics
431
432#if defined( __parallel )
433    CALL MPI_FINALIZE( ierr )
434#endif
435
436 END PROGRAM palm
Note: See TracBrowser for help on using the repository browser.