source: palm/trunk/SOURCE/palm.f90 @ 1558

Last change on this file since 1558 was 1539, checked in by raasch, 10 years ago

version update to 4.0

  • Property svn:keywords set to Id
File size: 12.4 KB
RevLine 
[1]1 PROGRAM palm
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
[1310]17! Copyright 1997-2014 Leibniz Universitaet Hannover
[1036]18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1]21! -----------------
[1375]22!
[1483]23!
[1321]24! Former revisions:
25! -----------------
26! $Id: palm.f90 1539 2015-01-28 07:43:05Z suehring $
27!
[1483]28! 1482 2014-10-18 12:34:45Z raasch
29! adjustments for using CUDA-aware OpenMPI
30!
[1469]31! 1468 2014-09-24 14:06:57Z maronga
32! Adapted for use on up to 6-digit processor cores
33!
[1403]34! 1402 2014-05-09 14:25:13Z raasch
35! location messages added
36!
[1375]37! 1374 2014-04-25 12:55:07Z raasch
38! bugfix: various modules added
39!
[1321]40! 1320 2014-03-20 08:40:49Z raasch
[1320]41! ONLY-attribute added to USE-statements,
42! kind-parameters added to all INTEGER and REAL declaration statements,
43! kinds are defined in new module kinds,
44! old module precision_kind is removed,
45! revision history before 2012 removed,
46! comment fields (!:) to be used for variable explanations added to
47! all variable declaration statements
[77]48!
[1319]49! 1318 2014-03-17 13:35:16Z raasch
50! module interfaces removed
51!
[1242]52! 1241 2013-10-30 11:36:58Z heinze
53! initialization of nuding and large scale forcing from external file
54!
[1222]55! 1221 2013-09-10 08:59:13Z raasch
56! +wall_flags_00, rflags_invers, rflags_s_inner in copyin statement
57!
[1213]58! 1212 2013-08-15 08:46:27Z raasch
59! +tri in copyin statement
60!
[1182]61! 1179 2013-06-14 05:57:58Z raasch
62! ref_state added to copyin-list
63!
[1114]64! 1113 2013-03-10 02:48:14Z raasch
65! openACC statements modified
66!
[1112]67! 1111 2013-03-08 23:54:10Z raasch
68! openACC statements updated
69!
[1093]70! 1092 2013-02-02 11:24:22Z raasch
71! unused variables removed
72!
[1037]73! 1036 2012-10-22 13:43:42Z raasch
74! code put under GPL (PALM 3.9)
75!
[1017]76! 1015 2012-09-27 09:23:24Z raasch
[863]77! Version number changed from 3.8 to 3.8a.
[1017]78! OpenACC statements added + code changes required for GPU optimization
[863]79!
[850]80! 849 2012-03-15 10:35:09Z raasch
81! write_particles renamed lpm_write_restart_file
82!
[1]83! Revision 1.1  1997/07/24 11:23:35  raasch
84! Initial revision
85!
86!
87! Description:
88! ------------
89! Large-Eddy Simulation (LES) model for the convective boundary layer,
90! optimized for use on parallel machines (implementation realized using the
91! Message Passing Interface (MPI)). The model can also be run on vector machines
92! (less well optimized) and workstations. Versions for the different types of
93! machines are controlled via cpp-directives.
94! Model runs are only feasible using the ksh-script mrun.
95!------------------------------------------------------------------------------!
96
[1374]97    USE arrays_3d
[1]98
[1320]99    USE control_parameters,                                                    &
100        ONLY:  coupling_char, coupling_mode, do2d_at_begin, do3d_at_begin,     &
101               io_blocks, io_group, large_scale_forcing, nudging,              &
[1374]102               simulated_time, simulated_time_chr, version, wall_heatflux, write_binary
[1320]103
104    USE cpulog,                                                                &
105        ONLY:  cpu_log, log_point, cpu_statistics
106
[1374]107    USE grid_variables,                                                        &
108        ONLY:  fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, wall_e_x, wall_e_y, &
109               wall_u, wall_v, wall_w_x, wall_w_y
110
111    USE indices,                                                               &
112        ONLY:  ngp_2dh, ngp_2dh_s_inner, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u, nzb_diff_v,     &
113               nzb_s_inner, nzb_s_outer, nzb_u_inner, nzb_u_outer, nzb_v_inner,&
114               nzb_v_outer, nzb_w_inner, nzb_w_outer, rflags_invers,           &
115               rflags_s_inner, wall_flags_0, wall_flags_00
116
[1320]117    USE kinds
118
119    USE ls_forcing_mod,                                                        &
120        ONLY:  init_ls_forcing
121
122    USE nudge_mod,                                                             &
123        ONLY:  init_nudge
124
125    USE particle_attributes,                                                   &
126        ONLY:  particle_advection
127
[1]128    USE pegrid
129
[1374]130    USE statistics,                                                            &
131        ONLY:  hom, rmask, weight_pres, weight_substep
132
[1015]133#if defined( __openacc )
134    USE OPENACC
135#endif
136
[1]137    IMPLICIT NONE
138
139!
140!-- Local variables
[1482]141    CHARACTER(LEN=9)  ::  time_to_string  !:
142    CHARACTER(LEN=10) ::  env_string      !: to store string of environment var
143    INTEGER(iwp)      ::  env_stat        !: to hold status of GET_ENV
144    INTEGER(iwp)      ::  i               !:
145    INTEGER(iwp)      ::  myid_openmpi    !: OpenMPI local rank for CUDA aware MPI
[1015]146#if defined( __openacc )
[1320]147    REAL(wp), DIMENSION(100) ::  acc_dum     !:
[1015]148#endif
[1]149
[1539]150    version = 'PALM 4.0'
[75]151
[1]152#if defined( __parallel )
153!
154!-- MPI initialisation. comm2d is preliminary set, because
155!-- it will be defined in init_pegrid but is used before in cpu_log.
156    CALL MPI_INIT( ierr )
157    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
[206]158    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
[1]159    comm_palm = MPI_COMM_WORLD
160    comm2d    = MPI_COMM_WORLD
161
162!
[206]163!-- Initialize PE topology in case of coupled runs
164    CALL init_coupling
[102]165#endif
166
[1015]167#if defined( __openacc )
[102]168!
[1482]169!-- Get the local MPI rank in case of CUDA aware OpenMPI. Important, if there
170!-- is more than one accelerator board on the node
171    CALL GET_ENVIRONMENT_VARIABLE('OMPI_COMM_WORLD_LOCAL_RANK',                &
172         VALUE=env_string, STATUS=env_stat )
173    READ( env_string, '(I1)' )  myid_openmpi
174    PRINT*, '### local_rank = ', myid_openmpi, '  status=',env_stat
175!
[1015]176!-- Get the number of accelerator boards per node and assign the MPI processes
177!-- to these boards
[1092]178    PRINT*, '*** ACC_DEVICE_NVIDIA = ', ACC_DEVICE_NVIDIA
[1015]179    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
[1092]180    IF ( numprocs == 1  .AND.  num_acc_per_node > 0 )  num_acc_per_node = 1
[1482]181    PRINT*, '*** myid = ', myid_openmpi, ' num_acc_per_node = ', num_acc_per_node
182    acc_rank = MOD( myid_openmpi, num_acc_per_node )
[1015]183    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
184!
185!-- Test output (to be removed later)
[1482]186    WRITE (*,'(A,I6,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid_openmpi,   &
187                                      ' to CPU ', acc_rank, ' Devices: ',      &
188                                      num_acc_per_node, ' connected to:',      &
[1015]189                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
190#endif
[1092]191
[1015]192!
193!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
194!-- region
195    !$acc data copyin( acc_dum )
196
197!
[1]198!-- Initialize measuring of the CPU-time remaining to the run
199    CALL local_tremain_ini
200
201!
202!-- Start of total CPU time measuring.
203    CALL cpu_log( log_point(1), 'total', 'start' )
204    CALL cpu_log( log_point(2), 'initialisation', 'start' )
205
206!
[206]207!-- Open a file for debug output
[1468]208    WRITE (myid_char,'(''_'',I6.6)')  myid
[206]209    OPEN( 9, FILE='DEBUG'//TRIM( coupling_char )//myid_char, FORM='FORMATTED' )
210
211!
[1]212!-- Initialize dvrp logging. Also, one PE maybe split from the global
213!-- communicator for doing the dvrp output. In that case, the number of
214!-- PEs available for PALM is reduced by one and communicator comm_palm
215!-- is changed respectively.
216#if defined( __parallel )
217    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
218!
[102]219!-- TEST OUTPUT (TO BE REMOVED)
220    WRITE(9,*) '*** coupling_mode = "', TRIM( coupling_mode ), '"'
221    CALL LOCAL_FLUSH( 9 )
[215]222    IF ( TRIM( coupling_mode ) /= 'uncoupled' )  THEN
223       PRINT*, '*** PE', myid, ' Global target PE:', target_id, &
224               TRIM( coupling_mode )
225    ENDIF
[102]226#endif
227
[108]228    CALL init_dvrp_logging
229
[102]230!
[108]231!-- Read control parameters from NAMELIST files and read environment-variables
232    CALL parin
233
234!
235!-- Determine processor topology and local array indices
236    CALL init_pegrid
237
238!
[1]239!-- Generate grid parameters
240    CALL init_grid
241
242!
[1241]243!-- Initialize nudging if required
244    IF ( nudging )  THEN
245       CALL init_nudge
246    ENDIF
247
248!
249!-- Initialize reading of large scale forcing from external file - if required
250    IF ( large_scale_forcing )  THEN
251       CALL init_ls_forcing
252    ENDIF
253
254!
[1]255!-- Check control parameters and deduce further quantities
256    CALL check_parameters
257
258!
259!-- Initialize all necessary variables
260    CALL init_3d_model
261
262!
263!-- Output of program header
264    IF ( myid == 0 )  CALL header
265
266    CALL cpu_log( log_point(2), 'initialisation', 'stop' )
267
268!
269!-- Set start time in format hh:mm:ss
270    simulated_time_chr = time_to_string( simulated_time )
271
272!
273!-- If required, output of initial arrays
274    IF ( do2d_at_begin )  THEN
275       CALL data_output_2d( 'xy', 0 )
276       CALL data_output_2d( 'xz', 0 )
277       CALL data_output_2d( 'yz', 0 )
278    ENDIF
279    IF ( do3d_at_begin )  THEN
280       CALL data_output_3d( 0 )
281    ENDIF
282
283!
[1015]284!-- Declare and initialize variables in the accelerator memory with their
285!-- host values
[1113]286    !$acc  data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
[1212]287    !$acc       copyin( tri, tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ref_state, ug, u_init, vg, v_init, zu, zw )   &
[1015]288    !$acc       copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h )      &
289    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
290    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
291    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
[1221]292    !$acc       copyin( nzb_w_outer, rflags_invers, rflags_s_inner, rmask, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0, wall_flags_00 )  &
293    !$acc       copyin( ngp_2dh, ngp_2dh_s_inner )  &
[1113]294    !$acc       copyin( weight_pres, weight_substep )
[1015]295!
[495]296!-- Integration of the model equations using timestep-scheme
[1]297    CALL time_integration
298
299!
[495]300!-- If required, write binary data for restart runs
301    IF ( write_binary(1:4) == 'true' )  THEN
[759]302
303       CALL cpu_log( log_point(22), 'write_3d_binary', 'start' )
304
[1402]305       CALL location_message( 'writing restart data', .FALSE. )
306
[759]307       CALL check_open( 14 )
308
309       DO  i = 0, io_blocks-1
310          IF ( i == io_group )  THEN
[1]311!
[759]312!--          Write flow field data
313             CALL write_3d_binary
314          ENDIF
315#if defined( __parallel )
316          CALL MPI_BARRIER( comm2d, ierr )
317#endif
318       ENDDO
319
[1402]320       CALL location_message( 'finished', .TRUE. )
321
[759]322       CALL cpu_log( log_point(22), 'write_3d_binary', 'stop' )
323
[495]324!
325!--    If required, write particle data
[849]326       IF ( particle_advection )  CALL lpm_write_restart_file
[495]327    ENDIF
[1]328
329!
330!-- If required, repeat output of header including the required CPU-time
331    IF ( myid == 0 )  CALL header
332!
333!-- If required, final user-defined actions, and
334!-- last actions on the open files and close files. Unit 14 was opened
335!-- in write_3d_binary but it is closed here, to allow writing on this
336!-- unit in routine user_last_actions.
337    CALL cpu_log( log_point(4), 'last actions', 'start' )
[759]338    DO  i = 0, io_blocks-1
339       IF ( i == io_group )  THEN
340          CALL user_last_actions
341          IF ( write_binary(1:4) == 'true' )  CALL close_file( 14 )
342       ENDIF
343#if defined( __parallel )
344       CALL MPI_BARRIER( comm2d, ierr )
345#endif
346    ENDDO
[1]347    CALL close_file( 0 )
348    CALL close_dvrp
349    CALL cpu_log( log_point(4), 'last actions', 'stop' )
350
[102]351#if defined( __mpi2 )
[1]352!
[206]353!-- Test exchange via intercommunicator in case of a MPI-2 coupling
[102]354    IF ( coupling_mode == 'atmosphere_to_ocean' )  THEN
355       i = 12345 + myid
356       CALL MPI_SEND( i, 1, MPI_INTEGER, myid, 11, comm_inter, ierr )
357    ELSEIF ( coupling_mode == 'ocean_to_atmosphere' )  THEN
358       CALL MPI_RECV( i, 1, MPI_INTEGER, myid, 11, comm_inter, status, ierr )
359       PRINT*, '### myid: ', myid, '   received from atmosphere:  i = ', i
360    ENDIF
361#endif
362
363!
[1015]364!-- Close the OpenACC dummy data region
365    !$acc end data
366    !$acc end data
367
368!
[1]369!-- Take final CPU-time for CPU-time analysis
370    CALL cpu_log( log_point(1), 'total', 'stop' )
371    CALL cpu_statistics
372
373#if defined( __parallel )
374    CALL MPI_FINALIZE( ierr )
375#endif
376
377 END PROGRAM palm
Note: See TracBrowser for help on using the repository browser.