source: palm/trunk/SOURCE/palm.f90 @ 1374

Last change on this file since 1374 was 1374, checked in by raasch, 7 years ago

bugfixes:
missing variables added to ONLY lists in USE statements (advec_s_bc, advec_s_pw, advec_s_up, advec_ws, buoyancy, diffusion_e, diffusion_s, fft_xy, flow_statistics, palm, prognostic_equations)
missing module kinds added (cuda_fft_interfaces)
dpk renamed dp (fft_xy)
missing dependency for check_open added (Makefile)
variables removed from acc-present-list (diffusion_e, diffusion_w, diffusivities, production_e, wall_fluxes)
syntax errors removed from openacc-branch (flow_statistics)
USE-statement for nopointer-case added (swap_timelevel)

  • Property svn:keywords set to Id
File size: 11.5 KB
RevLine 
[1]1 PROGRAM palm
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
[1310]17! Copyright 1997-2014 Leibniz Universitaet Hannover
[1036]18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1]21! -----------------
[1374]22! bugfix: module arrays_3d added
[1321]23!
24! Former revisions:
25! -----------------
26! $Id: palm.f90 1374 2014-04-25 12:55:07Z raasch $
27!
28! 1320 2014-03-20 08:40:49Z raasch
[1320]29! ONLY-attribute added to USE-statements,
30! kind-parameters added to all INTEGER and REAL declaration statements,
31! kinds are defined in new module kinds,
32! old module precision_kind is removed,
33! revision history before 2012 removed,
34! comment fields (!:) to be used for variable explanations added to
35! all variable declaration statements
[77]36!
[1319]37! 1318 2014-03-17 13:35:16Z raasch
38! module interfaces removed
39!
[1242]40! 1241 2013-10-30 11:36:58Z heinze
41! initialization of nuding and large scale forcing from external file
42!
[1222]43! 1221 2013-09-10 08:59:13Z raasch
44! +wall_flags_00, rflags_invers, rflags_s_inner in copyin statement
45!
[1213]46! 1212 2013-08-15 08:46:27Z raasch
47! +tri in copyin statement
48!
[1182]49! 1179 2013-06-14 05:57:58Z raasch
50! ref_state added to copyin-list
51!
[1114]52! 1113 2013-03-10 02:48:14Z raasch
53! openACC statements modified
54!
[1112]55! 1111 2013-03-08 23:54:10Z raasch
56! openACC statements updated
57!
[1093]58! 1092 2013-02-02 11:24:22Z raasch
59! unused variables removed
60!
[1037]61! 1036 2012-10-22 13:43:42Z raasch
62! code put under GPL (PALM 3.9)
63!
[1017]64! 1015 2012-09-27 09:23:24Z raasch
[863]65! Version number changed from 3.8 to 3.8a.
[1017]66! OpenACC statements added + code changes required for GPU optimization
[863]67!
[850]68! 849 2012-03-15 10:35:09Z raasch
69! write_particles renamed lpm_write_restart_file
70!
[1]71! Revision 1.1  1997/07/24 11:23:35  raasch
72! Initial revision
73!
74!
75! Description:
76! ------------
77! Large-Eddy Simulation (LES) model for the convective boundary layer,
78! optimized for use on parallel machines (implementation realized using the
79! Message Passing Interface (MPI)). The model can also be run on vector machines
80! (less well optimized) and workstations. Versions for the different types of
81! machines are controlled via cpp-directives.
82! Model runs are only feasible using the ksh-script mrun.
83!------------------------------------------------------------------------------!
84
[1374]85    USE arrays_3d
[1]86
[1320]87    USE control_parameters,                                                    &
88        ONLY:  coupling_char, coupling_mode, do2d_at_begin, do3d_at_begin,     &
89               io_blocks, io_group, large_scale_forcing, nudging,              &
[1374]90               simulated_time, simulated_time_chr, version, wall_heatflux, write_binary
[1320]91
92    USE cpulog,                                                                &
93        ONLY:  cpu_log, log_point, cpu_statistics
94
[1374]95    USE grid_variables,                                                        &
96        ONLY:  fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, wall_e_x, wall_e_y, &
97               wall_u, wall_v, wall_w_x, wall_w_y
98
99    USE indices,                                                               &
100        ONLY:  ngp_2dh, ngp_2dh_s_inner, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u, nzb_diff_v,     &
101               nzb_s_inner, nzb_s_outer, nzb_u_inner, nzb_u_outer, nzb_v_inner,&
102               nzb_v_outer, nzb_w_inner, nzb_w_outer, rflags_invers,           &
103               rflags_s_inner, wall_flags_0, wall_flags_00
104
[1320]105    USE kinds
106
107    USE ls_forcing_mod,                                                        &
108        ONLY:  init_ls_forcing
109
110    USE nudge_mod,                                                             &
111        ONLY:  init_nudge
112
113    USE particle_attributes,                                                   &
114        ONLY:  particle_advection
115
[1]116    USE pegrid
117
[1374]118    USE statistics,                                                            &
119        ONLY:  hom, rmask, weight_pres, weight_substep
120
[1015]121#if defined( __openacc )
122    USE OPENACC
123#endif
124
[1]125    IMPLICIT NONE
126
127!
128!-- Local variables
[1320]129    CHARACTER(LEN=9) ::  time_to_string  !:
130    INTEGER(iwp)     ::  i               !:
[1015]131#if defined( __openacc )
[1320]132    REAL(wp), DIMENSION(100) ::  acc_dum     !:
[1015]133#endif
[1]134
[1289]135    version = 'PALM 3.10'
[75]136
[1]137#if defined( __parallel )
138!
139!-- MPI initialisation. comm2d is preliminary set, because
140!-- it will be defined in init_pegrid but is used before in cpu_log.
141    CALL MPI_INIT( ierr )
142    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
[206]143    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
[1]144    comm_palm = MPI_COMM_WORLD
145    comm2d    = MPI_COMM_WORLD
146
147!
[206]148!-- Initialize PE topology in case of coupled runs
149    CALL init_coupling
[102]150#endif
151
[1015]152#if defined( __openacc )
[102]153!
[1015]154!-- Get the number of accelerator boards per node and assign the MPI processes
155!-- to these boards
[1092]156    PRINT*, '*** ACC_DEVICE_NVIDIA = ', ACC_DEVICE_NVIDIA
[1015]157    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
[1092]158    IF ( numprocs == 1  .AND.  num_acc_per_node > 0 )  num_acc_per_node = 1
159    PRINT*, '*** myid = ', myid, ' num_acc_per_node = ', num_acc_per_node
[1015]160    acc_rank = MOD( myid, num_acc_per_node )
[1092]161!    STOP '****'
[1015]162    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
163!
164!-- Test output (to be removed later)
165    WRITE (*,'(A,I4,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',&
166                                      acc_rank, ' Devices: ', num_acc_per_node,&
167                                      ' connected to:',                        &
168                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
169#endif
[1092]170
[1015]171!
172!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
173!-- region
174    !$acc data copyin( acc_dum )
175
176!
[1]177!-- Initialize measuring of the CPU-time remaining to the run
178    CALL local_tremain_ini
179
180!
181!-- Start of total CPU time measuring.
182    CALL cpu_log( log_point(1), 'total', 'start' )
183    CALL cpu_log( log_point(2), 'initialisation', 'start' )
184
185!
[206]186!-- Open a file for debug output
187    WRITE (myid_char,'(''_'',I4.4)')  myid
188    OPEN( 9, FILE='DEBUG'//TRIM( coupling_char )//myid_char, FORM='FORMATTED' )
189
190!
[1]191!-- Initialize dvrp logging. Also, one PE maybe split from the global
192!-- communicator for doing the dvrp output. In that case, the number of
193!-- PEs available for PALM is reduced by one and communicator comm_palm
194!-- is changed respectively.
195#if defined( __parallel )
196    CALL MPI_COMM_RANK( comm_palm, myid, ierr )
197!
[102]198!-- TEST OUTPUT (TO BE REMOVED)
199    WRITE(9,*) '*** coupling_mode = "', TRIM( coupling_mode ), '"'
200    CALL LOCAL_FLUSH( 9 )
[215]201    IF ( TRIM( coupling_mode ) /= 'uncoupled' )  THEN
202       PRINT*, '*** PE', myid, ' Global target PE:', target_id, &
203               TRIM( coupling_mode )
204    ENDIF
[102]205#endif
206
[108]207    CALL init_dvrp_logging
208
[102]209!
[108]210!-- Read control parameters from NAMELIST files and read environment-variables
211    CALL parin
212
213!
214!-- Determine processor topology and local array indices
215    CALL init_pegrid
216
217!
[1]218!-- Generate grid parameters
219    CALL init_grid
220
221!
[1241]222!-- Initialize nudging if required
223    IF ( nudging )  THEN
224       CALL init_nudge
225    ENDIF
226
227!
228!-- Initialize reading of large scale forcing from external file - if required
229    IF ( large_scale_forcing )  THEN
230       CALL init_ls_forcing
231    ENDIF
232
233!
[1]234!-- Check control parameters and deduce further quantities
235    CALL check_parameters
236
237!
238!-- Initialize all necessary variables
239    CALL init_3d_model
240
241!
242!-- Output of program header
243    IF ( myid == 0 )  CALL header
244
245    CALL cpu_log( log_point(2), 'initialisation', 'stop' )
246
247!
248!-- Set start time in format hh:mm:ss
249    simulated_time_chr = time_to_string( simulated_time )
250
251!
252!-- If required, output of initial arrays
253    IF ( do2d_at_begin )  THEN
254       CALL data_output_2d( 'xy', 0 )
255       CALL data_output_2d( 'xz', 0 )
256       CALL data_output_2d( 'yz', 0 )
257    ENDIF
258    IF ( do3d_at_begin )  THEN
259       CALL data_output_3d( 0 )
260    ENDIF
261
262!
[1015]263!-- Declare and initialize variables in the accelerator memory with their
264!-- host values
[1113]265    !$acc  data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
[1212]266    !$acc       copyin( tri, tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ref_state, ug, u_init, vg, v_init, zu, zw )   &
[1015]267    !$acc       copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h )      &
268    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
269    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
270    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
[1221]271    !$acc       copyin( nzb_w_outer, rflags_invers, rflags_s_inner, rmask, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0, wall_flags_00 )  &
272    !$acc       copyin( ngp_2dh, ngp_2dh_s_inner )  &
[1113]273    !$acc       copyin( weight_pres, weight_substep )
[1015]274!
[495]275!-- Integration of the model equations using timestep-scheme
[1]276    CALL time_integration
277
278!
[495]279!-- If required, write binary data for restart runs
280    IF ( write_binary(1:4) == 'true' )  THEN
[759]281
282       CALL cpu_log( log_point(22), 'write_3d_binary', 'start' )
283
284       CALL check_open( 14 )
285
286       DO  i = 0, io_blocks-1
287          IF ( i == io_group )  THEN
[1]288!
[759]289!--          Write flow field data
290             CALL write_3d_binary
291          ENDIF
292#if defined( __parallel )
293          CALL MPI_BARRIER( comm2d, ierr )
294#endif
295       ENDDO
296
297       CALL cpu_log( log_point(22), 'write_3d_binary', 'stop' )
298
[495]299!
300!--    If required, write particle data
[849]301       IF ( particle_advection )  CALL lpm_write_restart_file
[495]302    ENDIF
[1]303
304!
305!-- If required, repeat output of header including the required CPU-time
306    IF ( myid == 0 )  CALL header
307!
308!-- If required, final user-defined actions, and
309!-- last actions on the open files and close files. Unit 14 was opened
310!-- in write_3d_binary but it is closed here, to allow writing on this
311!-- unit in routine user_last_actions.
312    CALL cpu_log( log_point(4), 'last actions', 'start' )
[759]313    DO  i = 0, io_blocks-1
314       IF ( i == io_group )  THEN
315          CALL user_last_actions
316          IF ( write_binary(1:4) == 'true' )  CALL close_file( 14 )
317       ENDIF
318#if defined( __parallel )
319       CALL MPI_BARRIER( comm2d, ierr )
320#endif
321    ENDDO
[1]322    CALL close_file( 0 )
323    CALL close_dvrp
324    CALL cpu_log( log_point(4), 'last actions', 'stop' )
325
[102]326#if defined( __mpi2 )
[1]327!
[206]328!-- Test exchange via intercommunicator in case of a MPI-2 coupling
[102]329    IF ( coupling_mode == 'atmosphere_to_ocean' )  THEN
330       i = 12345 + myid
331       CALL MPI_SEND( i, 1, MPI_INTEGER, myid, 11, comm_inter, ierr )
332    ELSEIF ( coupling_mode == 'ocean_to_atmosphere' )  THEN
333       CALL MPI_RECV( i, 1, MPI_INTEGER, myid, 11, comm_inter, status, ierr )
334       PRINT*, '### myid: ', myid, '   received from atmosphere:  i = ', i
335    ENDIF
336#endif
337
338!
[1015]339!-- Close the OpenACC dummy data region
340    !$acc end data
341    !$acc end data
342
343!
[1]344!-- Take final CPU-time for CPU-time analysis
345    CALL cpu_log( log_point(1), 'total', 'stop' )
346    CALL cpu_statistics
347
348#if defined( __parallel )
349    CALL MPI_FINALIZE( ierr )
350#endif
351
352 END PROGRAM palm
Note: See TracBrowser for help on using the repository browser.