source: palm/trunk/SOURCE/palm.f90 @ 1212

Last change on this file since 1212 was 1212, checked in by raasch, 8 years ago

tridia-solver moved to seperate module; the tridiagonal matrix coefficients of array tri are calculated only once at the beginning

  • Property svn:keywords set to Id
File size: 10.3 KB
Line 
1 PROGRAM palm
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! -----------------
22! +tri in copyin statement
23!
24! Former revisions:
25! -----------------
26! $Id: palm.f90 1212 2013-08-15 08:46:27Z raasch $
27!
28! 1179 2013-06-14 05:57:58Z raasch
29! ref_state added to copyin-list
30!
31! 1113 2013-03-10 02:48:14Z raasch
32! openACC statements modified
33!
34! 1111 2013-03-08 23:54:10Z raasch
35! openACC statements updated
36!
37! 1092 2013-02-02 11:24:22Z raasch
38! unused variables removed
39!
40! 1036 2012-10-22 13:43:42Z raasch
41! code put under GPL (PALM 3.9)
42!
43! 1015 2012-09-27 09:23:24Z raasch
44! Version number changed from 3.8 to 3.8a.
45! OpenACC statements added + code changes required for GPU optimization
46!
47! 849 2012-03-15 10:35:09Z raasch
48! write_particles renamed lpm_write_restart_file
49!
50! 759 2011-09-15 13:58:31Z raasch
51! Splitting of parallel I/O, cpu measurement for write_3d_binary and opening
52! of unit 14 moved to here
53!
54! 495 2010-03-02 00:40:15Z raasch
55! Particle data for restart runs are only written if write_binary=.T..
56!
57! 215 2008-11-18 09:54:31Z raasch
58! Initialization of coupled runs modified for MPI-1 and moved to external
59! subroutine init_coupling
60!
61! 197 2008-09-16 15:29:03Z raasch
62! Workaround for getting information about the coupling mode
63!
64! 108 2007-08-24 15:10:38Z letzel
65! Get coupling mode from environment variable, change location of debug output
66!
67! 75 2007-03-22 09:54:05Z raasch
68! __vtk directives removed, write_particles is called only in case of particle
69! advection switched on, open unit 9 for debug output,
70! setting of palm version moved from modules to here
71!
72! RCS Log replace by Id keyword, revision history cleaned up
73!
74! Revision 1.10  2006/08/04 14:53:12  raasch
75! Distibution of run description header removed, call of header moved behind
76! init_3d_model
77!
78! Revision 1.2  2001/01/25 07:15:06  raasch
79! Program name changed to PALM, module test_variables removed.
80! Initialization of dvrp logging as well as exit of dvrp moved to new
81! subroutines init_dvrp_logging and close_dvrp (file init_dvrp.f90)
82!
83! Revision 1.1  1997/07/24 11:23:35  raasch
84! Initial revision
85!
86!
87! Description:
88! ------------
89! Large-Eddy Simulation (LES) model for the convective boundary layer,
90! optimized for use on parallel machines (implementation realized using the
91! Message Passing Interface (MPI)). The model can also be run on vector machines
92! (less well optimized) and workstations. Versions for the different types of
93! machines are controlled via cpp-directives.
94! Model runs are only feasible using the ksh-script mrun.
95!------------------------------------------------------------------------------!
96
97
98    USE arrays_3d
99    USE constants
100    USE control_parameters
101    USE cpulog
102    USE dvrp_variables
103    USE grid_variables
104    USE indices
105    USE interfaces
106    USE model_1d
107    USE particle_attributes
108    USE pegrid
109    USE spectrum
110    USE statistics
111
112#if defined( __openacc )
113    USE OPENACC
114#endif
115
116    IMPLICIT NONE
117
118!
119!-- Local variables
120    CHARACTER (LEN=9) ::  time_to_string
121    INTEGER           ::  i
122#if defined( __openacc )
123    REAL, DIMENSION(100) ::  acc_dum
124#endif
125
126    version = 'PALM 3.9'
127
128#if defined( __parallel )
129!
130!-- MPI initialisation. comm2d is preliminary set, because
131!-- it will be defined in init_pegrid but is used before in cpu_log.
132    CALL MPI_INIT( ierr )
133    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
134    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
135    comm_palm = MPI_COMM_WORLD
136    comm2d    = MPI_COMM_WORLD
137
138!
139!-- Initialize PE topology in case of coupled runs
140    CALL init_coupling
141#endif
142
143#if defined( __openacc )
144!
145!-- Get the number of accelerator boards per node and assign the MPI processes
146!-- to these boards
147    PRINT*, '*** ACC_DEVICE_NVIDIA = ', ACC_DEVICE_NVIDIA
148    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
149    IF ( numprocs == 1  .AND.  num_acc_per_node > 0 )  num_acc_per_node = 1
150    PRINT*, '*** myid = ', myid, ' num_acc_per_node = ', num_acc_per_node
151    acc_rank = MOD( myid, num_acc_per_node )
152!    STOP '****'
153    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
154!
155!-- Test output (to be removed later)
156    WRITE (*,'(A,I4,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',&
157                                      acc_rank, ' Devices: ', num_acc_per_node,&
158                                      ' connected to:',                        &
159                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
160#endif
161
162!
163!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
164!-- region
165    !$acc data copyin( acc_dum )
166
167!
168!-- Initialize measuring of the CPU-time remaining to the run
169    CALL local_tremain_ini
170
171!
172!-- Start of total CPU time measuring.
173    CALL cpu_log( log_point(1), 'total', 'start' )
174    CALL cpu_log( log_point(2), 'initialisation', 'start' )
175
176!
177!-- Open a file for debug output
178    WRITE (myid_char,'(''_'',I4.4)')  myid
179    OPEN( 9, FILE='DEBUG'//TRIM( coupling_char )//myid_char, FORM='FORMATTED' )
180
181!
182!-- Initialize dvrp logging. Also, one PE maybe split from the global
183!-- communicator for doing the dvrp output. In that case, the number of
184!-- PEs available for PALM is reduced by one and communicator comm_palm
185!-- is changed respectively.
186#if defined( __parallel )
187    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
188!
189!-- TEST OUTPUT (TO BE REMOVED)
190    WRITE(9,*) '*** coupling_mode = "', TRIM( coupling_mode ), '"'
191    CALL LOCAL_FLUSH( 9 )
192    IF ( TRIM( coupling_mode ) /= 'uncoupled' )  THEN
193       PRINT*, '*** PE', myid, ' Global target PE:', target_id, &
194               TRIM( coupling_mode )
195    ENDIF
196#endif
197
198    CALL init_dvrp_logging
199
200!
201!-- Read control parameters from NAMELIST files and read environment-variables
202    CALL parin
203
204!
205!-- Determine processor topology and local array indices
206    CALL init_pegrid
207
208!
209!-- Generate grid parameters
210    CALL init_grid
211
212!
213!-- Check control parameters and deduce further quantities
214    CALL check_parameters
215
216
217!
218!-- Initialize all necessary variables
219    CALL init_3d_model
220
221!
222!-- Output of program header
223    IF ( myid == 0 )  CALL header
224
225    CALL cpu_log( log_point(2), 'initialisation', 'stop' )
226
227!
228!-- Set start time in format hh:mm:ss
229    simulated_time_chr = time_to_string( simulated_time )
230
231!
232!-- If required, output of initial arrays
233    IF ( do2d_at_begin )  THEN
234       CALL data_output_2d( 'xy', 0 )
235       CALL data_output_2d( 'xz', 0 )
236       CALL data_output_2d( 'yz', 0 )
237    ENDIF
238    IF ( do3d_at_begin )  THEN
239       CALL data_output_3d( 0 )
240    ENDIF
241
242!
243!-- Declare and initialize variables in the accelerator memory with their
244!-- host values
245    !$acc  data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
246    !$acc       copyin( tri, tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ref_state, ug, u_init, vg, v_init, zu, zw )   &
247    !$acc       copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h )      &
248    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
249    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
250    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
251    !$acc       copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 )  &
252    !$acc       copyin( weight_pres, weight_substep )
253!
254!-- Integration of the model equations using timestep-scheme
255    CALL time_integration
256
257!
258!-- If required, write binary data for restart runs
259    IF ( write_binary(1:4) == 'true' )  THEN
260
261       CALL cpu_log( log_point(22), 'write_3d_binary', 'start' )
262
263       CALL check_open( 14 )
264
265       DO  i = 0, io_blocks-1
266          IF ( i == io_group )  THEN
267!
268!--          Write flow field data
269             CALL write_3d_binary
270          ENDIF
271#if defined( __parallel )
272          CALL MPI_BARRIER( comm2d, ierr )
273#endif
274       ENDDO
275
276       CALL cpu_log( log_point(22), 'write_3d_binary', 'stop' )
277
278!
279!--    If required, write particle data
280       IF ( particle_advection )  CALL lpm_write_restart_file
281    ENDIF
282
283!
284!-- If required, repeat output of header including the required CPU-time
285    IF ( myid == 0 )  CALL header
286!
287!-- If required, final user-defined actions, and
288!-- last actions on the open files and close files. Unit 14 was opened
289!-- in write_3d_binary but it is closed here, to allow writing on this
290!-- unit in routine user_last_actions.
291    CALL cpu_log( log_point(4), 'last actions', 'start' )
292    DO  i = 0, io_blocks-1
293       IF ( i == io_group )  THEN
294          CALL user_last_actions
295          IF ( write_binary(1:4) == 'true' )  CALL close_file( 14 )
296       ENDIF
297#if defined( __parallel )
298       CALL MPI_BARRIER( comm2d, ierr )
299#endif
300    ENDDO
301    CALL close_file( 0 )
302    CALL close_dvrp
303    CALL cpu_log( log_point(4), 'last actions', 'stop' )
304
305#if defined( __mpi2 )
306!
307!-- Test exchange via intercommunicator in case of a MPI-2 coupling
308    IF ( coupling_mode == 'atmosphere_to_ocean' )  THEN
309       i = 12345 + myid
310       CALL MPI_SEND( i, 1, MPI_INTEGER, myid, 11, comm_inter, ierr )
311    ELSEIF ( coupling_mode == 'ocean_to_atmosphere' )  THEN
312       CALL MPI_RECV( i, 1, MPI_INTEGER, myid, 11, comm_inter, status, ierr )
313       PRINT*, '### myid: ', myid, '   received from atmosphere:  i = ', i
314    ENDIF
315#endif
316
317!
318!-- Close the OpenACC dummy data region
319    !$acc end data
320    !$acc end data
321
322!
323!-- Take final CPU-time for CPU-time analysis
324    CALL cpu_log( log_point(1), 'total', 'stop' )
325    CALL cpu_statistics
326
327#if defined( __parallel )
328    CALL MPI_FINALIZE( ierr )
329#endif
330
331 END PROGRAM palm
Note: See TracBrowser for help on using the repository browser.