source: palm/trunk/SOURCE/palm.f90 @ 1113

Last change on this file since 1113 was 1113, checked in by raasch, 8 years ago

GPU porting of boundary conditions and routine pres; index bug removec from radiation boundary condition

  • Property svn:keywords set to Id
File size: 10.2 KB
Line 
1 PROGRAM palm
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! -----------------
22! openACC statements modified
23!
24! Former revisions:
25! -----------------
26! $Id: palm.f90 1113 2013-03-10 02:48:14Z raasch $
27!
28! 1111 2013-03-08 23:54:10Z raasch
29! openACC statements updated
30!
31! 1092 2013-02-02 11:24:22Z raasch
32! unused variables removed
33!
34! 1036 2012-10-22 13:43:42Z raasch
35! code put under GPL (PALM 3.9)
36!
37! 1015 2012-09-27 09:23:24Z raasch
38! Version number changed from 3.8 to 3.8a.
39! OpenACC statements added + code changes required for GPU optimization
40!
41! 849 2012-03-15 10:35:09Z raasch
42! write_particles renamed lpm_write_restart_file
43!
44! 759 2011-09-15 13:58:31Z raasch
45! Splitting of parallel I/O, cpu measurement for write_3d_binary and opening
46! of unit 14 moved to here
47!
48! 495 2010-03-02 00:40:15Z raasch
49! Particle data for restart runs are only written if write_binary=.T..
50!
51! 215 2008-11-18 09:54:31Z raasch
52! Initialization of coupled runs modified for MPI-1 and moved to external
53! subroutine init_coupling
54!
55! 197 2008-09-16 15:29:03Z raasch
56! Workaround for getting information about the coupling mode
57!
58! 108 2007-08-24 15:10:38Z letzel
59! Get coupling mode from environment variable, change location of debug output
60!
61! 75 2007-03-22 09:54:05Z raasch
62! __vtk directives removed, write_particles is called only in case of particle
63! advection switched on, open unit 9 for debug output,
64! setting of palm version moved from modules to here
65!
66! RCS Log replace by Id keyword, revision history cleaned up
67!
68! Revision 1.10  2006/08/04 14:53:12  raasch
69! Distibution of run description header removed, call of header moved behind
70! init_3d_model
71!
72! Revision 1.2  2001/01/25 07:15:06  raasch
73! Program name changed to PALM, module test_variables removed.
74! Initialization of dvrp logging as well as exit of dvrp moved to new
75! subroutines init_dvrp_logging and close_dvrp (file init_dvrp.f90)
76!
77! Revision 1.1  1997/07/24 11:23:35  raasch
78! Initial revision
79!
80!
81! Description:
82! ------------
83! Large-Eddy Simulation (LES) model for the convective boundary layer,
84! optimized for use on parallel machines (implementation realized using the
85! Message Passing Interface (MPI)). The model can also be run on vector machines
86! (less well optimized) and workstations. Versions for the different types of
87! machines are controlled via cpp-directives.
88! Model runs are only feasible using the ksh-script mrun.
89!------------------------------------------------------------------------------!
90
91
92    USE arrays_3d
93    USE constants
94    USE control_parameters
95    USE cpulog
96    USE dvrp_variables
97    USE grid_variables
98    USE indices
99    USE interfaces
100    USE model_1d
101    USE particle_attributes
102    USE pegrid
103    USE spectrum
104    USE statistics
105
106#if defined( __openacc )
107    USE OPENACC
108#endif
109
110    IMPLICIT NONE
111
112!
113!-- Local variables
114    CHARACTER (LEN=9) ::  time_to_string
115    INTEGER           ::  i
116#if defined( __openacc )
117    REAL, DIMENSION(100) ::  acc_dum
118#endif
119
120    version = 'PALM 3.9'
121
122#if defined( __parallel )
123!
124!-- MPI initialisation. comm2d is preliminary set, because
125!-- it will be defined in init_pegrid but is used before in cpu_log.
126    CALL MPI_INIT( ierr )
127    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
128    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
129    comm_palm = MPI_COMM_WORLD
130    comm2d    = MPI_COMM_WORLD
131
132!
133!-- Initialize PE topology in case of coupled runs
134    CALL init_coupling
135#endif
136
137#if defined( __openacc )
138!
139!-- Get the number of accelerator boards per node and assign the MPI processes
140!-- to these boards
141    PRINT*, '*** ACC_DEVICE_NVIDIA = ', ACC_DEVICE_NVIDIA
142    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
143    IF ( numprocs == 1  .AND.  num_acc_per_node > 0 )  num_acc_per_node = 1
144    PRINT*, '*** myid = ', myid, ' num_acc_per_node = ', num_acc_per_node
145    acc_rank = MOD( myid, num_acc_per_node )
146!    STOP '****'
147    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
148!
149!-- Test output (to be removed later)
150    WRITE (*,'(A,I4,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',&
151                                      acc_rank, ' Devices: ', num_acc_per_node,&
152                                      ' connected to:',                        &
153                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
154#endif
155
156!
157!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
158!-- region
159    !$acc data copyin( acc_dum )
160
161!
162!-- Initialize measuring of the CPU-time remaining to the run
163    CALL local_tremain_ini
164
165!
166!-- Start of total CPU time measuring.
167    CALL cpu_log( log_point(1), 'total', 'start' )
168    CALL cpu_log( log_point(2), 'initialisation', 'start' )
169
170!
171!-- Open a file for debug output
172    WRITE (myid_char,'(''_'',I4.4)')  myid
173    OPEN( 9, FILE='DEBUG'//TRIM( coupling_char )//myid_char, FORM='FORMATTED' )
174
175!
176!-- Initialize dvrp logging. Also, one PE maybe split from the global
177!-- communicator for doing the dvrp output. In that case, the number of
178!-- PEs available for PALM is reduced by one and communicator comm_palm
179!-- is changed respectively.
180#if defined( __parallel )
181    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
182!
183!-- TEST OUTPUT (TO BE REMOVED)
184    WRITE(9,*) '*** coupling_mode = "', TRIM( coupling_mode ), '"'
185    CALL LOCAL_FLUSH( 9 )
186    IF ( TRIM( coupling_mode ) /= 'uncoupled' )  THEN
187       PRINT*, '*** PE', myid, ' Global target PE:', target_id, &
188               TRIM( coupling_mode )
189    ENDIF
190#endif
191
192    CALL init_dvrp_logging
193
194!
195!-- Read control parameters from NAMELIST files and read environment-variables
196    CALL parin
197
198!
199!-- Determine processor topology and local array indices
200    CALL init_pegrid
201
202!
203!-- Generate grid parameters
204    CALL init_grid
205
206!
207!-- Check control parameters and deduce further quantities
208    CALL check_parameters
209
210
211!
212!-- Initialize all necessary variables
213    CALL init_3d_model
214
215!
216!-- Output of program header
217    IF ( myid == 0 )  CALL header
218
219    CALL cpu_log( log_point(2), 'initialisation', 'stop' )
220
221!
222!-- Set start time in format hh:mm:ss
223    simulated_time_chr = time_to_string( simulated_time )
224
225!
226!-- If required, output of initial arrays
227    IF ( do2d_at_begin )  THEN
228       CALL data_output_2d( 'xy', 0 )
229       CALL data_output_2d( 'xz', 0 )
230       CALL data_output_2d( 'yz', 0 )
231    ENDIF
232    IF ( do3d_at_begin )  THEN
233       CALL data_output_3d( 0 )
234    ENDIF
235
236!
237!-- Declare and initialize variables in the accelerator memory with their
238!-- host values
239    !$acc  data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
240    !$acc       copyin( tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ug, u_init, vg, v_init, zu, zw )   &
241    !$acc       copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h )      &
242    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
243    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
244    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
245    !$acc       copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 )  &
246    !$acc       copyin( weight_pres, weight_substep )
247!
248!-- Integration of the model equations using timestep-scheme
249    CALL time_integration
250
251!
252!-- If required, write binary data for restart runs
253    IF ( write_binary(1:4) == 'true' )  THEN
254
255       CALL cpu_log( log_point(22), 'write_3d_binary', 'start' )
256
257       CALL check_open( 14 )
258
259       DO  i = 0, io_blocks-1
260          IF ( i == io_group )  THEN
261!
262!--          Write flow field data
263             CALL write_3d_binary
264          ENDIF
265#if defined( __parallel )
266          CALL MPI_BARRIER( comm2d, ierr )
267#endif
268       ENDDO
269
270       CALL cpu_log( log_point(22), 'write_3d_binary', 'stop' )
271
272!
273!--    If required, write particle data
274       IF ( particle_advection )  CALL lpm_write_restart_file
275    ENDIF
276
277!
278!-- If required, repeat output of header including the required CPU-time
279    IF ( myid == 0 )  CALL header
280!
281!-- If required, final user-defined actions, and
282!-- last actions on the open files and close files. Unit 14 was opened
283!-- in write_3d_binary but it is closed here, to allow writing on this
284!-- unit in routine user_last_actions.
285    CALL cpu_log( log_point(4), 'last actions', 'start' )
286    DO  i = 0, io_blocks-1
287       IF ( i == io_group )  THEN
288          CALL user_last_actions
289          IF ( write_binary(1:4) == 'true' )  CALL close_file( 14 )
290       ENDIF
291#if defined( __parallel )
292       CALL MPI_BARRIER( comm2d, ierr )
293#endif
294    ENDDO
295    CALL close_file( 0 )
296    CALL close_dvrp
297    CALL cpu_log( log_point(4), 'last actions', 'stop' )
298
299#if defined( __mpi2 )
300!
301!-- Test exchange via intercommunicator in case of a MPI-2 coupling
302    IF ( coupling_mode == 'atmosphere_to_ocean' )  THEN
303       i = 12345 + myid
304       CALL MPI_SEND( i, 1, MPI_INTEGER, myid, 11, comm_inter, ierr )
305    ELSEIF ( coupling_mode == 'ocean_to_atmosphere' )  THEN
306       CALL MPI_RECV( i, 1, MPI_INTEGER, myid, 11, comm_inter, status, ierr )
307       PRINT*, '### myid: ', myid, '   received from atmosphere:  i = ', i
308    ENDIF
309#endif
310
311!
312!-- Close the OpenACC dummy data region
313    !$acc end data
314    !$acc end data
315
316!
317!-- Take final CPU-time for CPU-time analysis
318    CALL cpu_log( log_point(1), 'total', 'stop' )
319    CALL cpu_statistics
320
321#if defined( __parallel )
322    CALL MPI_FINALIZE( ierr )
323#endif
324
325 END PROGRAM palm
Note: See TracBrowser for help on using the repository browser.