source: palm/trunk/SOURCE/palm.f90 @ 1015

Last change on this file since 1015 was 1015, checked in by raasch, 12 years ago

Starting with changes required for GPU optimization. OpenACC statements for using NVIDIA GPUs added.
Adjustment of mixing length to the Prandtl mixing length at first grid point above ground removed.
mask array is set zero for ghost boundaries

  • Property svn:keywords set to Id
File size: 8.9 KB
RevLine 
[1]1 PROGRAM palm
2
3!------------------------------------------------------------------------------!
[484]4! Current revisions:
[1]5! -----------------
[1015]6! OpenACC statements added + code changes required for GPU optimization
[77]7!
8! Former revisions:
9! -----------------
10! $Id: palm.f90 1015 2012-09-27 09:23:24Z raasch $
11!
[863]12! Version number changed from 3.8 to 3.8a.
13!
[850]14! 849 2012-03-15 10:35:09Z raasch
15! write_particles renamed lpm_write_restart_file
16!
[760]17! 759 2011-09-15 13:58:31Z raasch
18! Splitting of parallel I/O, cpu measurement for write_3d_binary and opening
19! of unit 14 moved to here
20!
[496]21! 495 2010-03-02 00:40:15Z raasch
22! Particle data for restart runs are only written if write_binary=.T..
23!
[226]24! 215 2008-11-18 09:54:31Z raasch
25! Initialization of coupled runs modified for MPI-1 and moved to external
26! subroutine init_coupling
27!
[198]28! 197 2008-09-16 15:29:03Z raasch
29! Workaround for getting information about the coupling mode
30!
[110]31! 108 2007-08-24 15:10:38Z letzel
32! Get coupling mode from environment variable, change location of debug output
33!
[77]34! 75 2007-03-22 09:54:05Z raasch
[70]35! __vtk directives removed, write_particles is called only in case of particle
[75]36! advection switched on, open unit 9 for debug output,
37! setting of palm version moved from modules to here
[1]38!
[3]39! RCS Log replace by Id keyword, revision history cleaned up
40!
[1]41! Revision 1.10  2006/08/04 14:53:12  raasch
42! Distibution of run description header removed, call of header moved behind
43! init_3d_model
44!
45! Revision 1.2  2001/01/25 07:15:06  raasch
46! Program name changed to PALM, module test_variables removed.
47! Initialization of dvrp logging as well as exit of dvrp moved to new
48! subroutines init_dvrp_logging and close_dvrp (file init_dvrp.f90)
49!
50! Revision 1.1  1997/07/24 11:23:35  raasch
51! Initial revision
52!
53!
54! Description:
55! ------------
56! Large-Eddy Simulation (LES) model for the convective boundary layer,
57! optimized for use on parallel machines (implementation realized using the
58! Message Passing Interface (MPI)). The model can also be run on vector machines
59! (less well optimized) and workstations. Versions for the different types of
60! machines are controlled via cpp-directives.
61! Model runs are only feasible using the ksh-script mrun.
62!------------------------------------------------------------------------------!
63
64
65    USE arrays_3d
66    USE constants
[102]67    USE control_parameters
[1]68    USE cpulog
69    USE dvrp_variables
70    USE grid_variables
71    USE indices
72    USE interfaces
73    USE model_1d
74    USE particle_attributes
75    USE pegrid
76    USE spectrum
77    USE statistics
78
[1015]79#if defined( __openacc )
80    USE OPENACC
81#endif
82
[1]83    IMPLICIT NONE
84
85!
86!-- Local variables
87    CHARACTER (LEN=9) ::  time_to_string
88    CHARACTER (LEN=1) ::  cdum
[559]89    INTEGER           ::  i, run_description_header_i(80)
[1015]90#if defined( __openacc )
91    REAL, DIMENSION(100) ::  acc_dum
92#endif
[1]93
[861]94    version = 'PALM 3.8a'
[75]95
[1]96#if defined( __parallel )
97!
98!-- MPI initialisation. comm2d is preliminary set, because
99!-- it will be defined in init_pegrid but is used before in cpu_log.
100    CALL MPI_INIT( ierr )
101    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
[206]102    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
[1]103    comm_palm = MPI_COMM_WORLD
104    comm2d    = MPI_COMM_WORLD
105
106!
[206]107!-- Initialize PE topology in case of coupled runs
108    CALL init_coupling
[102]109#endif
110
[1015]111#if defined( __openacc )
[102]112!
[1015]113!-- Get the number of accelerator boards per node and assign the MPI processes
114!-- to these boards
115    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
116    acc_rank = MOD( myid, num_acc_per_node )
117    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
118!
119!-- Test output (to be removed later)
120    WRITE (*,'(A,I4,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',&
121                                      acc_rank, ' Devices: ', num_acc_per_node,&
122                                      ' connected to:',                        &
123                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
124#endif
125!
126!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
127!-- region
128    !$acc data copyin( acc_dum )
129
130!
[1]131!-- Initialize measuring of the CPU-time remaining to the run
132    CALL local_tremain_ini
133
134!
135!-- Start of total CPU time measuring.
136    CALL cpu_log( log_point(1), 'total', 'start' )
137    CALL cpu_log( log_point(2), 'initialisation', 'start' )
138
139!
[206]140!-- Open a file for debug output
141    WRITE (myid_char,'(''_'',I4.4)')  myid
142    OPEN( 9, FILE='DEBUG'//TRIM( coupling_char )//myid_char, FORM='FORMATTED' )
143
144!
[1]145!-- Initialize dvrp logging. Also, one PE maybe split from the global
146!-- communicator for doing the dvrp output. In that case, the number of
147!-- PEs available for PALM is reduced by one and communicator comm_palm
148!-- is changed respectively.
149#if defined( __parallel )
150    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
151!
[102]152!-- TEST OUTPUT (TO BE REMOVED)
153    WRITE(9,*) '*** coupling_mode = "', TRIM( coupling_mode ), '"'
154    CALL LOCAL_FLUSH( 9 )
[215]155    IF ( TRIM( coupling_mode ) /= 'uncoupled' )  THEN
156       PRINT*, '*** PE', myid, ' Global target PE:', target_id, &
157               TRIM( coupling_mode )
158    ENDIF
[102]159#endif
160
[108]161    CALL init_dvrp_logging
162
[102]163!
[108]164!-- Read control parameters from NAMELIST files and read environment-variables
165    CALL parin
166
167!
168!-- Determine processor topology and local array indices
169    CALL init_pegrid
170
171!
[1]172!-- Generate grid parameters
173    CALL init_grid
174
175!
176!-- Check control parameters and deduce further quantities
177    CALL check_parameters
178
[667]179
[1]180!
181!-- Initialize all necessary variables
182    CALL init_3d_model
183
184!
185!-- Output of program header
186    IF ( myid == 0 )  CALL header
187
188    CALL cpu_log( log_point(2), 'initialisation', 'stop' )
189
190!
191!-- Set start time in format hh:mm:ss
192    simulated_time_chr = time_to_string( simulated_time )
193
194!
195!-- If required, output of initial arrays
196    IF ( do2d_at_begin )  THEN
197       CALL data_output_2d( 'xy', 0 )
198       CALL data_output_2d( 'xz', 0 )
199       CALL data_output_2d( 'yz', 0 )
200    ENDIF
201    IF ( do3d_at_begin )  THEN
202       CALL data_output_3d( 0 )
203    ENDIF
204
205!
[1015]206!-- Declare and initialize variables in the accelerator memory with their
207!-- host values
208    !$acc  data copyin( diss, e, e_p, kh, km, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
209    !$acc       copyin( ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ug, vg, zu, zw )   &
210    !$acc       copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h )      &
211    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
212    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
213    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
214    !$acc       copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 )
215!
[495]216!-- Integration of the model equations using timestep-scheme
[1]217    CALL time_integration
218
219!
[495]220!-- If required, write binary data for restart runs
221    IF ( write_binary(1:4) == 'true' )  THEN
[759]222
223       CALL cpu_log( log_point(22), 'write_3d_binary', 'start' )
224
225       CALL check_open( 14 )
226
227       DO  i = 0, io_blocks-1
228          IF ( i == io_group )  THEN
[1]229!
[759]230!--          Write flow field data
231             CALL write_3d_binary
232          ENDIF
233#if defined( __parallel )
234          CALL MPI_BARRIER( comm2d, ierr )
235#endif
236       ENDDO
237
238       CALL cpu_log( log_point(22), 'write_3d_binary', 'stop' )
239
[495]240!
241!--    If required, write particle data
[849]242       IF ( particle_advection )  CALL lpm_write_restart_file
[495]243    ENDIF
[1]244
245!
246!-- If required, repeat output of header including the required CPU-time
247    IF ( myid == 0 )  CALL header
248!
249!-- If required, final user-defined actions, and
250!-- last actions on the open files and close files. Unit 14 was opened
251!-- in write_3d_binary but it is closed here, to allow writing on this
252!-- unit in routine user_last_actions.
253    CALL cpu_log( log_point(4), 'last actions', 'start' )
[759]254    DO  i = 0, io_blocks-1
255       IF ( i == io_group )  THEN
256          CALL user_last_actions
257          IF ( write_binary(1:4) == 'true' )  CALL close_file( 14 )
258       ENDIF
259#if defined( __parallel )
260       CALL MPI_BARRIER( comm2d, ierr )
261#endif
262    ENDDO
[1]263    CALL close_file( 0 )
264    CALL close_dvrp
265    CALL cpu_log( log_point(4), 'last actions', 'stop' )
266
[102]267#if defined( __mpi2 )
[1]268!
[206]269!-- Test exchange via intercommunicator in case of a MPI-2 coupling
[102]270    IF ( coupling_mode == 'atmosphere_to_ocean' )  THEN
271       i = 12345 + myid
272       CALL MPI_SEND( i, 1, MPI_INTEGER, myid, 11, comm_inter, ierr )
273    ELSEIF ( coupling_mode == 'ocean_to_atmosphere' )  THEN
274       CALL MPI_RECV( i, 1, MPI_INTEGER, myid, 11, comm_inter, status, ierr )
275       PRINT*, '### myid: ', myid, '   received from atmosphere:  i = ', i
276    ENDIF
277#endif
278
279!
[1015]280!-- Close the OpenACC dummy data region
281    !$acc end data
282    !$acc end data
283
284!
[1]285!-- Take final CPU-time for CPU-time analysis
286    CALL cpu_log( log_point(1), 'total', 'stop' )
287    CALL cpu_statistics
288
289#if defined( __parallel )
290    CALL MPI_FINALIZE( ierr )
291#endif
292
293 END PROGRAM palm
Note: See TracBrowser for help on using the repository browser.