Changeset 4893 for palm/trunk/SOURCE
- Timestamp:
- Mar 2, 2021 4:39:14 PM (4 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/init_pegrid.f90
r4848 r4893 358 358 CALL MPI_COMM_RANK( comm1dy, myidy, ierr ) 359 359 360 361 360 ! 362 361 !-- Calculate array bounds along x-direction for every PE. -
palm/trunk/SOURCE/land_surface_model_mod.f90
r4876 r4893 24 24 ! ----------------- 25 25 ! $Id$ 26 ! revised output of surface data via MPI-IO for better performance 27 ! 28 ! 4876 2021-02-17 12:27:36Z raasch 26 29 ! bugfix for instantaneous c_liq output 27 30 ! … … 6249 6252 INTEGER(iwp) :: l !< index variable for surface orientation 6250 6253 6251 INTEGER(iwp),DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< index for surface data (MPI-IO) 6254 INTEGER(iwp),DIMENSION(nys:nyn,nxl:nxr) :: global_end_index !< end index for surface data (MPI-IO) 6255 INTEGER(iwp),DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< start index for surface data (MPI-IO) 6252 6256 6253 6257 LOGICAL :: surface_data_to_write !< switch for MPI-I/O if PE has surface data to write … … 6435 6439 6436 6440 CALL rd_mpi_io_surface_filetypes( surf_lsm_h(l)%start_index, surf_lsm_h(l)%end_index, & 6437 surface_data_to_write, global_start_index ) 6438 6439 CALL wrd_mpi_io( 'lsm_start_index_h_' // dum, surf_lsm_h(l)%start_index ) 6440 CALL wrd_mpi_io( 'lsm_end_index_h_' // dum, surf_lsm_h(l)%end_index ) 6441 surface_data_to_write, global_start_index, & 6442 global_end_index ) 6443 6441 6444 CALL wrd_mpi_io( 'lsm_global_start_index_h_' // dum, global_start_index ) 6445 CALL wrd_mpi_io( 'lsm_global_end_index_h_' // dum, global_end_index ) 6442 6446 6443 6447 IF ( .NOT. surface_data_to_write ) CYCLE 6444 6448 6445 6449 CALL wrd_mpi_io_surface( 't_soil_h(' // dum // ')', t_soil_h(l)%var_2d ) 6446 CALL wrd_mpi_io_surface( 'm_soil_h(' // dum // ')', 6450 CALL wrd_mpi_io_surface( 'm_soil_h(' // dum // ')', m_soil_h(l)%var_2d ) 6447 6451 CALL wrd_mpi_io_surface( 'm_liq_h(' // dum // ')', m_liq_h(l)%var_1d ) 6448 6452 CALL wrd_mpi_io_surface( 't_surface_h(' // dum // ')', t_surface_h(l)%var_1d ) … … 6454 6458 6455 6459 CALL rd_mpi_io_surface_filetypes ( surf_lsm_v(l)%start_index, surf_lsm_v(l)%end_index, & 6456 surface_data_to_write, global_start_index )6457 6458 CALL wrd_mpi_io( 'lsm_start_index_v_' // dum, surf_lsm_v(l)%start_index ) 6459 CALL wrd_mpi_io( 'lsm_ end_index_v_' // dum, surf_lsm_v(l)%end_index )6460 CALL wrd_mpi_io( 'lsm_global_ start_index_v_' // dum , global_start_index )6460 surface_data_to_write, global_start_index, & 6461 global_end_index ) 6462 6463 CALL wrd_mpi_io( 'lsm_global_start_index_v_' // dum, global_start_index ) 6464 CALL wrd_mpi_io( 'lsm_global_end_index_v_' // dum, global_end_index ) 6461 6465 6462 6466 IF ( .NOT. surface_data_to_write ) CYCLE … … 7133 7137 INTEGER(iwp) :: l !< running index surface orientation 7134 7138 7135 !INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: end_index 7136 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start 7137 !INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: start_index 7138 7139 LOGICAL :: array_found 7140 LOGICAL :: ldum 7139 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_end_index 7140 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start_index 7141 7142 LOGICAL :: array_found 7143 LOGICAL :: data_to_read !< switch to steer reading of data 7141 7144 7142 7145 … … 7205 7208 WRITE( dum, '(I1)') l 7206 7209 7207 CALL rrd_mpi_io( 'lsm_start_index_h_' // dum, surf_lsm_h(l)%start_index ) 7208 CALL rrd_mpi_io( 'lsm_end_index_h_' // dum, surf_lsm_h(l)%end_index ) 7209 CALL rrd_mpi_io( 'lsm_global_start_index_h_' // dum, global_start ) 7210 7211 CALL rd_mpi_io_surface_filetypes ( surf_lsm_h(l)%start_index, surf_lsm_h(l)%end_index, ldum,& 7212 global_start ) 7213 7214 IF ( MAXVAL( surf_lsm_h(l)%end_index ) <= 0 ) CYCLE 7210 ! 7211 !-- surf_lsm_h(l)%start_index and surf_lsm_h(l)%end_index are already set and should not be read 7212 !-- from restart file. 7213 CALL rrd_mpi_io( 'lsm_global_start_index_h_' // dum, global_start_index ) 7214 CALL rrd_mpi_io( 'lsm_global_end_index_h_' // dum, global_end_index ) 7215 7216 CALL rd_mpi_io_surface_filetypes ( surf_lsm_h(l)%start_index, surf_lsm_h(l)%end_index, & 7217 data_to_read, global_start_index, global_end_index ) 7218 IF ( .NOT. data_to_read ) CYCLE 7215 7219 7216 7220 CALL rrd_mpi_io_surface( 't_soil_h(' // dum // ')', t_soil_h(l)%var_2d ) … … 7218 7222 CALL rrd_mpi_io_surface( 'm_liq_h(' // dum // ')', m_liq_h(l)%var_1d ) 7219 7223 CALL rrd_mpi_io_surface( 't_surface_h(' // dum // ')', t_surface_h(l)%var_1d ) 7224 7220 7225 ENDDO 7221 7226 … … 7224 7229 WRITE( dum, '(I1)') l 7225 7230 7226 !kk In case of nothing to do, the settings of start_index and end_index differ 7227 !kk between writing and reading restart file 7228 !kk 7229 !kk Has to be discussed with the developers 7230 7231 CALL rrd_mpi_io( 'lsm_start_index_v_' // dum, surf_lsm_v(l)%start_index ) 7232 CALL rrd_mpi_io( 'lsm_end_index_v_' // dum, surf_lsm_v(l)%end_index ) 7233 CALL rrd_mpi_io( 'lsm_global_start_index_v_' // dum , global_start ) 7234 7235 CALL rd_mpi_io_surface_filetypes( surf_lsm_v(l)%start_index, surf_lsm_v(l)%end_index, ldum, & 7236 global_start ) 7237 7238 IF ( MAXVAL( surf_lsm_v(l)%end_index ) <= 0 ) CYCLE 7231 ! 7232 !-- surf_lsm_v(l)%start_index and surf_lsm_v(l)%end_index are already set and should not be read 7233 !-- from restart file. 7234 CALL rrd_mpi_io( 'lsm_global_start_index_v_' // dum , global_start_index ) 7235 CALL rrd_mpi_io( 'lsm_global_end_index_v_' // dum , global_end_index ) 7236 7237 CALL rd_mpi_io_surface_filetypes( surf_lsm_v(l)%start_index, surf_lsm_v(l)%end_index, & 7238 data_to_read, global_start_index, global_end_index ) 7239 IF ( .NOT. data_to_read ) CYCLE 7239 7240 7240 7241 CALL rrd_mpi_io_surface( 't_soil_v(' // dum // ')', t_soil_v(l)%var_2d ) -
palm/trunk/SOURCE/read_restart_data_mod.f90
r4848 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance, 28 ! therefore binary version number has changed 29 ! 30 ! 4848 2021-01-21 15:51:51Z gronemeier 27 31 ! bugfix: removed syn_turb_gen from restart files 28 32 ! … … 276 280 CALL location_message( 'read global restart data', 'start' ) 277 281 282 ! 283 !-- Caution: When any of the read instructions have been changed, the 284 !-- ------- version number stored in the variable binary_version_global has 285 !-- to be increased. The same changes must also be done in wrd_write_global. 286 binary_version_global = '5.3' 287 278 288 IF ( TRIM( restart_data_format_input ) == 'fortran_binary' ) THEN 279 289 ! … … 286 296 READ ( 13 ) version_on_file 287 297 288 binary_version_global = '5.2'289 298 IF ( TRIM( version_on_file ) /= TRIM( binary_version_global ) ) THEN 290 299 WRITE( message_string, * ) 'version mismatch concerning ', & … … 373 382 ! 374 383 !-- Now read all control parameters: 375 !-- Caution: When the following read instructions have been changed, the 376 !-- ------- version number stored in the variable binary_version_global has 377 !-- to be increased. The same changes must also be done in 378 !-- wrd_write_global. 384 379 385 READ ( 13 ) length 380 386 READ ( 13 ) restart_string(1:length) … … 873 879 !-- Read global restart data using MPI-IO 874 880 !-- ATTENTION: Arrays need to be read with routine rrd_mpi_io_global_array! 875 !-- Caution: When any of the following read instructions have been changed, the 876 !-- ------- version number stored in the variable binary_version_global has 877 !-- to be increased. The same changes must also be done in 878 !-- wrd_write_global. 881 879 882 ! 880 883 !-- Open the MPI-IO restart file. … … 886 889 CALL rrd_mpi_io( 'binary_version_global', version_on_file ) 887 890 888 binary_version_global = '5.1'889 891 IF ( TRIM( version_on_file ) /= TRIM( binary_version_global ) ) THEN 890 892 WRITE( message_string, * ) 'version mismatch concerning binary_version_global:', & -
palm/trunk/SOURCE/restart_data_mpi_io_mod.f90
r4857 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance 28 ! 29 ! 4857 2021-01-26 07:24:41Z raasch 27 30 ! bugfix: allocation of 3d-int4 array moved from particle output to standard output 28 31 ! … … 155 158 myidx, & 156 159 myidy, & 157 npex, &158 npey, &159 160 numprocs, & 160 161 pdims … … 183 184 INTEGER(iwp) :: fh = -1 !< MPI-IO file handle 184 185 #if defined( __parallel ) 185 INTEGER(iwp) :: fhs = -1 !< MPI-IO file handle to open file with comm2d always186 #endif187 186 INTEGER(iwp) :: ft_surf = -1 !< MPI filetype surface data 188 #if defined( __parallel )189 187 INTEGER(iwp) :: ft_2di_nb !< MPI filetype 2D array INTEGER no outer boundary 190 188 INTEGER(iwp) :: ft_2d !< MPI filetype 2D array REAL with outer boundaries … … 196 194 INTEGER(iwp) :: glo_start !< global start index on this PE 197 195 #if defined( __parallel ) 198 INTEGER(iwp) :: local_start !<199 #endif200 INTEGER(iwp) :: nr_iope !<201 INTEGER(iwp) :: nr_val !< local number of values in x and y direction202 #if defined( __parallel )203 196 INTEGER(iwp) :: win_2di !< 204 197 INTEGER(iwp) :: win_2dr !< … … 207 200 INTEGER(iwp) :: win_3dr !< 208 201 INTEGER(iwp) :: win_3ds !< 202 INTEGER(iwp) :: win_end = -1 !< 203 INTEGER(iwp) :: win_glost = -1 !< 204 INTEGER(iwp) :: win_out = -1 !< 205 INTEGER(iwp) :: win_start = -1 !< 209 206 INTEGER(iwp) :: win_surf = -1 !< 210 207 #endif … … 216 213 INTEGER(iwp), DIMENSION(:,:), POINTER, CONTIGUOUS :: array_2di !< 217 214 218 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_end_index !< 219 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_global_start !< 215 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_end_index !< extended end index, every grid cell has at least one value 216 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_start_index !< 217 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_end_index !< module copy of end_index 220 218 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_start_index !< 219 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: thread_index !< 220 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: target_thread !< 221 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: transfer_index !< 222 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: thread_values !< 223 ! 224 !-- Indices for cyclic fill 225 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: o_start_index !< 226 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_start_index !< 227 !#if defined( __parallel ) 228 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: o_end_index !< extended end index, every grid cell has at least one value 229 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_end_index !< extended end index, every grid cell has at least one value 230 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_global_start !< 231 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_global_end !< 232 !#endif 221 233 222 234 INTEGER(isp), DIMENSION(:,:,:), POINTER, CONTIGUOUS :: array_3di4 !< 223 235 INTEGER(idp), DIMENSION(:,:,:), POINTER, CONTIGUOUS :: array_3di8 !< 224 236 225 LOGICAL :: all_pes_write !< all PEs have data to write226 237 LOGICAL :: filetypes_created !< 227 238 LOGICAL :: io_on_limited_cores_per_node !< switch to shared memory MPI-IO … … 229 240 LOGICAL :: wr_flag !< file is opened for write 230 241 242 #if defined( __parallel ) 243 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: local_indices 244 #endif 245 246 REAL(wp), DIMENSION(:), POINTER, CONTIGUOUS :: array_out !< 231 247 #if defined( __parallel ) 232 248 REAL(wp), DIMENSION(:), POINTER, CONTIGUOUS :: array_1d !< … … 251 267 INTEGER(iwp) :: nr_int !< number of INTEGER entries in header 252 268 INTEGER(iwp) :: nr_real !< number of REAL entries in header 269 INTEGER(iwp) :: pes_along_x !< number of PEs along x-direction during writing restart file 270 INTEGER(iwp) :: pes_along_y !< number of PEs along y-direction during writing restart file 253 271 INTEGER(iwp) :: total_nx !< total number of points in x-direction 254 272 INTEGER(iwp) :: total_ny !< total number of points in y-direction 255 273 END TYPE general_header 256 274 257 TYPE(general_header), TARGET :: tgh !<275 TYPE(general_header), TARGET, PUBLIC :: tgh !< 258 276 259 277 TYPE(sm_class) :: sm_io !< … … 421 439 wrd_mpi_io_surface 422 440 423 424 441 CONTAINS 425 442 … … 452 469 TYPE(C_PTR) :: buf_ptr !< 453 470 #endif 471 454 472 455 473 offset = 0 … … 467 485 io_file_name = file_name 468 486 ! 469 !-- Setup for IO on a limited number of threads per node (using shared memory MPI)487 !-- Setup for IO on a limited number of PEs per node (using shared memory MPI) 470 488 IF ( rd_flag ) THEN 471 489 set_filetype = .TRUE. … … 822 840 823 841 ! 824 !-- TODO: describe in more detail what is done here and why it is done825 !-- save grid of main run842 !-- Save grid information of the mainrun, i.e. grid variables like nxl, nxr, nys, nyn and other 843 !-- values are stored within the mainrun_grid structure 826 844 CALL mainrun_grid%save_grid_into_this_class() 827 845 … … 834 852 rma_offset_s = 0 835 853 ! 836 !-- Determine, if gridpoints of the prerun are located on this thread.854 !-- Determine, if gridpoints of the prerun are located on this PE. 837 855 !-- Set the (cyclic) prerun grid. 838 856 nxr = MIN( nxr, nx_on_file ) … … 857 875 ny = ny_on_file 858 876 ! 859 !-- Determine, if this threadis doing IO877 !-- Determine, if this PE is doing IO 860 878 IF ( nnx > 0 .AND. nny > 0 ) THEN 861 879 color = 1 … … 892 910 #endif 893 911 ! 894 !-- Allocate 2d buffers as RMA window, accessible on all threads912 !-- Allocate 2d buffers as RMA window, accessible on all PEs 895 913 IF ( pe_active_for_read ) THEN 896 914 ALLOCATE( rmabuf_2di(nys:nyn,nxl:nxr) ) … … 918 936 919 937 ! 920 !-- Allocate 3d buffer as RMA window, accessable on all threads938 !-- Allocate 3d buffer as RMA window, accessable on all PEs 921 939 IF ( pe_active_for_read ) THEN 922 940 ALLOCATE( rmabuf_3d(nzb:nzt+1,nys:nyn,nxl:nxr) ) … … 932 950 933 951 ! 934 !-- TODO: comment in more detail, what is done here, and why 935 !-- save small grid 952 !-- Save grid of the prerun, i.e. grid variables like nxl, nxr, nys, nyn and other values 953 !-- are stored within the prerun_grid structure. 954 !-- The prerun grid can later be activated by calling prerun_grid%activate_grid_from_this_class() 936 955 CALL prerun_grid%save_grid_into_this_class() 937 956 prerun_grid%comm2d = comm_cyclic_fill … … 1152 1171 1153 1172 1154 !kk write(9,*) 'Here is rma_cylic_fill_real_2d ',nxl,nxr,nys,nyn; FLUSH(9)1155 1156 1173 ! 1157 1174 !-- Reading 2d real array on prerun grid … … 1297 1314 !-- array would be dimensioned in the caller subroutine like this: 1298 1315 !-- INTEGER, DIMENSION(nysg:nyng,nxlg:nxrg):: data 1299 message_string = '2d-INTEGER array "' // TRIM( name ) // '" to be read from restart ' //&1300 'f ile is defined with illegal dimensions in the PALM code'1316 message_string = '2d-INTEGER array with nbgp "' // TRIM( name ) // '" to be read ' // & 1317 'from restart file is defined with illegal dimensions in the PALM code' 1301 1318 CALL message( 'rrd_mpi_io_int_2d', 'PA0723', 3, 2, 0, 6, 0 ) 1302 1319 … … 1374 1391 1375 1392 1376 CALL prerun_grid%activate_grid_from_this_class()1377 1378 1393 IF ( pe_active_for_read ) THEN 1394 CALL prerun_grid%activate_grid_from_this_class() 1395 1379 1396 #if defined( __parallel ) 1380 1397 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_INTEGER, ft_2di_nb, 'native', & 1381 1398 MPI_INFO_NULL, ierr ) 1382 1399 CALL MPI_FILE_READ_ALL( fh, array_2di, SIZE( array_2di ), MPI_INTEGER, status, ierr ) 1400 #else 1401 CALL posix_lseek( fh, array_position ) 1402 CALL posix_read( fh, array_2di, SIZE( array_2di ) ) 1383 1403 #endif 1384 1404 DO i = nxl, nxr … … 1386 1406 ENDDO 1387 1407 data(1:nny,1:nnx) = rmabuf_2di 1388 ENDIF 1389 1390 CALL mainrun_grid%activate_grid_from_this_class()1408 1409 CALL mainrun_grid%activate_grid_from_this_class() 1410 ENDIF 1391 1411 1392 1412 #if defined( __parallel ) … … 1396 1416 #endif 1397 1417 1398 IF ( .NOT. pe_active_for_read ) THEN 1399 1400 is = nxl 1401 ie = nxr 1402 js = nys 1403 je = nyn 1404 1405 ELSE 1406 1407 is = nxl 1408 ie = nxr 1409 js = prerun_grid%nys+1 1410 je = nyn 1411 DO i = is, ie 1412 DO j = js, je 1413 i_remote = MOD(i,nx_on_file+1) 1414 j_remote = MOD(j,ny_on_file+1) 1415 rem_pe = remote_pe(i_remote,j_remote) 1416 rem_offs = rma_offset(i_remote,j_remote) 1417 nval = 1 1418 1419 #if defined( __parallel ) 1420 IF ( rem_pe /= myid ) THEN 1421 CALL MPI_GET( data(j-nys+1,i-nxl+1), nval, MPI_INTEGER, rem_pe, rem_offs, nval, & 1422 MPI_INTEGER, rmawin_2di, ierr ) 1423 ELSE 1424 data(j-nys+1,i-nxl+1) = rmabuf_2di(j_remote,i_remote) 1425 ENDIF 1426 #else 1427 data(j-nys+1,i-nxl+1) = array_2di(i_remote,j_remote) 1428 #endif 1429 ENDDO 1430 ENDDO 1431 is = prerun_grid%nxr+1 1432 ie = nxr 1433 js = nys 1434 je = nyn 1435 1436 ENDIF 1418 is = nxl 1419 ie = nxr 1420 js = nys 1421 je = nyn 1437 1422 1438 1423 DO i = is, ie … … 1717 1702 ierr ) 1718 1703 CALL MPI_FILE_READ_ALL( fh, array_3d, SIZE( array_3d ), MPI_REAL, status, ierr ) 1704 #else 1705 CALL posix_lseek( fh, array_position ) 1706 CALL posix_read( fh, array_3d, SIZE( array_3d ) ) 1719 1707 #endif 1720 1708 DO i = nxl, nxr … … 1727 1715 #if defined( __parallel ) 1728 1716 ! 1729 !-- Close RMA window to allow remote access 1730 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 1731 #endif 1732 1733 IF ( .NOT. pe_active_for_read ) THEN 1734 1735 is = nxl 1736 ie = nxr 1737 js = nys 1738 je = nyn 1739 1740 ELSE 1741 1742 is = nxl 1743 ie = nxr 1744 js = prerun_grid%nys+1 1745 je = nyn 1746 1747 DO i = is, ie 1748 DO j = js, je 1749 i_remote = MOD(i,nx_on_file+1) 1750 j_remote = MOD(j,ny_on_file+1) 1751 rem_pe = remote_pe(i_remote,j_remote) 1752 rem_offs = rma_offset(i_remote,j_remote)*(nzt-nzb+2) 1753 nval = nzt-nzb+2 1754 1755 #if defined( __parallel ) 1756 IF(rem_pe /= myid) THEN 1757 CALL MPI_GET( data(nzb,j,i), nval, MPI_REAL, rem_pe, rem_offs, nval, MPI_REAL, & 1758 rmawin_3d, ierr) 1759 ELSE 1760 data(:,j,i) = rmabuf_3d(:,j_remote,i_remote) 1761 ENDIF 1762 #else 1763 data(:,j,i) = array_3d(:,i_remote,j_remote) 1764 #endif 1765 ENDDO 1766 ENDDO 1767 is = prerun_grid%nxr+1 1768 ie = nxr 1769 js = nys 1770 je = nyn 1771 1772 ENDIF 1717 !-- Close RMA window to allow remote access 1718 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 1719 #endif 1720 1721 is = nxl 1722 ie = nxr 1723 js = nys 1724 je = nyn 1773 1725 1774 1726 DO i = is, ie 1775 1727 DO j = js, je 1776 i_remote = MOD( i,nx_on_file+1)1777 j_remote = MOD( j,ny_on_file+1)1728 i_remote = MOD( i, nx_on_file+1 ) 1729 j_remote = MOD( j, ny_on_file+1 ) 1778 1730 rem_pe = remote_pe(i_remote,j_remote) 1779 1731 rem_offs = rma_offset(i_remote,j_remote) * ( nzt-nzb+2 ) … … 1850 1802 1851 1803 IF ( found ) THEN 1852 #if defined( __parallel )1853 1804 CALL rd_mpi_io_create_filetypes_3dsoil( nzb_soil, nzt_soil ) 1805 #if defined( __parallel ) 1854 1806 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 1855 1807 IF ( sm_io%iam_io_pe ) THEN … … 1874 1826 ENDIF 1875 1827 1828 #if defined( __parallel ) 1829 IF ( sm_io%is_sm_active() ) THEN 1830 CALL MPI_WIN_FREE( win_3ds, ierr ) 1831 ELSE 1832 DEALLOCATE( array_3d_soil ) 1833 ENDIF 1834 #else 1835 DEALLOCATE( array_3d_soil ) 1836 #endif 1837 1876 1838 ELSE 1877 1839 … … 2042 2004 2043 2005 IF ( header_array_index == max_nr_arrays ) THEN 2044 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2006 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2007 CALL message( 'wrd_mpi_io_real_2d', 'PA0585', 1, 2, 0, 6, 0 ) 2045 2008 ENDIF 2046 2009 … … 2107 2070 2108 2071 IF ( header_array_index == max_nr_arrays ) THEN 2109 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2072 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2073 CALL message( 'wrd_mpi_io_int_2d', 'PA0585', 1, 2, 0, 6, 0 ) 2110 2074 ENDIF 2111 2075 … … 2182 2146 2183 2147 IF ( header_array_index == max_nr_arrays ) THEN 2184 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2148 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2149 CALL message( 'wrd_mpi_io_int4_3d', 'PA0585', 1, 2, 0, 6, 0 ) 2185 2150 ENDIF 2186 2151 … … 2225 2190 INT( (iog%nx+1), KIND = rd_offset_kind ) * isp 2226 2191 2227 write(9,*) 'array_position int4_3d ',trim(name),' ',array_position2228 2229 2192 END SUBROUTINE wrd_mpi_io_int4_3d 2230 2193 … … 2250 2213 2251 2214 IF ( header_array_index == max_nr_arrays ) THEN 2252 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2215 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2216 CALL message( 'wrd_mpi_io_int8_3d', 'PA0585', 1, 2, 0, 6, 0 ) 2253 2217 ENDIF 2254 2218 … … 2293 2257 INT( (iog%nx+1), KIND = rd_offset_kind ) * dp 2294 2258 2295 write(9,*) 'array_position int8_3d ',trim(name),' ',array_position2296 2297 2259 END SUBROUTINE wrd_mpi_io_int8_3d 2298 2260 … … 2318 2280 2319 2281 IF ( header_array_index == max_nr_arrays ) THEN 2320 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2282 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2283 CALL message( 'wrd_mpi_io_real_3d', 'PA0585', 1, 2, 0, 6, 0 ) 2321 2284 ENDIF 2322 2285 … … 2395 2358 2396 2359 IF ( header_array_index == max_nr_arrays ) THEN 2397 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2360 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2361 CALL message( 'wrd_mpi_io_real_3d_soil', 'PA0585', 1, 2, 0, 6, 0 ) 2398 2362 ENDIF 2399 2363 … … 2402 2366 header_array_index = header_array_index + 1 2403 2367 2404 #if defined( __parallel )2405 2368 CALL rd_mpi_io_create_filetypes_3dsoil( nzb_soil, nzt_soil ) 2406 #endif2407 2369 2408 2370 IF ( include_total_domain_boundaries) THEN … … 2432 2394 ENDIF 2433 2395 CALL sm_io%sm_node_barrier() 2396 2397 IF ( sm_io%is_sm_active() ) THEN 2398 CALL MPI_WIN_FREE( win_3ds, ierr ) 2399 ELSE 2400 DEALLOCATE( array_3d_soil ) 2401 ENDIF 2402 IF ( sm_io%iam_io_pe ) THEN 2403 CALL MPI_TYPE_FREE( ft_3dsoil, ierr ) 2404 ENDIF 2434 2405 #else 2435 2406 CALL posix_lseek( fh, array_position ) 2436 2407 CALL posix_write( fh, array_3d_soil, SIZE( array_3d_soil ) ) 2408 DEALLOCATE( array_3d_soil ) 2437 2409 #endif 2438 2410 ! … … 2589 2561 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, comm2d, ierr ) 2590 2562 ELSE 2591 IF 2563 IF( sm_io%iam_io_pe ) THEN 2592 2564 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2565 ENDIF 2566 IF ( myid == 0 ) THEN 2593 2567 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2594 CALL MPI_FILE_READ _ALL( fh, data, SIZE( data ), MPI_REAL, status, ierr )2568 CALL MPI_FILE_READ( fh, data, SIZE( data ), MPI_REAL, status, ierr ) 2595 2569 ENDIF 2596 IF ( sm_io%is_sm_active() ) THEN 2597 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, sm_io%comm_shared, ierr ) 2598 ENDIF 2570 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, comm2d, ierr ) 2599 2571 ENDIF 2600 2572 #else … … 2749 2721 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 2750 2722 ENDIF 2751 CALL MPI_BCAST( data, SIZE( data ), MPI_ REAL, 0, comm2d, ierr )2723 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, comm2d, ierr ) 2752 2724 ELSE 2753 IF 2725 IF( sm_io%iam_io_pe ) THEN 2754 2726 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2727 ENDIF 2728 IF ( myid == 0 ) THEN 2755 2729 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2756 CALL MPI_FILE_READ _ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr )2730 CALL MPI_FILE_READ( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 2757 2731 ENDIF 2758 IF ( sm_io%is_sm_active() ) THEN 2759 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 2760 ENDIF 2761 ENDIF 2732 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, comm2d, ierr ) 2733 ENDIF 2762 2734 #else 2763 2735 CALL posix_lseek( fh, array_position ) … … 2800 2772 2801 2773 IF ( header_array_index == max_nr_arrays ) THEN 2802 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2774 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2775 CALL message( 'wrd_mpi_io_global_array_real_1d', 'PA0585', 1, 2, 0, 6, 0 ) 2803 2776 ENDIF 2804 2777 … … 2939 2912 2940 2913 IF ( header_array_index == max_nr_arrays ) THEN 2941 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2914 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2915 CALL message( 'wrd_mpi_io_global_array_int_1d', 'PA0585', 1, 2, 0, 6, 0 ) 2942 2916 ENDIF 2943 2917 … … 3030 3004 ENDDO 3031 3005 3032 write(9,*) 'particle_size_read ',particle_size,array_size,array_position,sum(prt_global_index)3033 3034 3006 ALLOCATE( prt_data(MAX(array_size,1)) ) 3035 3007 … … 3078 3050 array_position = prt_nr_bytes 3079 3051 3080 write(9,*) 'array_position after particle read ',array_position,prt_nr_bytes,rs3081 3082 3052 DEALLOCATE( prt_data ) 3083 3053 … … 3092 3062 ! ------------ 3093 3063 !> Read 1d-REAL surface data array with MPI-IO. 3094 !--------------------------------------------------------------------------------------------------! 3095 SUBROUTINE rrd_mpi_io_surface( name, data, first_index ) 3064 !> This is a recursive subroutine. In case of cyclic fill mode it may call itself for reading parts 3065 !> of the prerun grid. 3066 !--------------------------------------------------------------------------------------------------! 3067 RECURSIVE SUBROUTINE rrd_mpi_io_surface( name, data, first_index ) 3096 3068 3097 3069 IMPLICIT NONE … … 3099 3071 CHARACTER(LEN=*), INTENT(IN) :: name !< 3100 3072 3073 INTEGER(iwp), OPTIONAL :: first_index !< 3074 INTEGER(iwp) :: i !< 3075 INTEGER(iwp) :: j !< 3076 INTEGER(iwp) :: lo_first_index !< 3077 3078 #if defined( __parallel ) 3079 INTEGER(iwp) :: buf_start !< 3101 3080 INTEGER(KIND=rd_offset_kind) :: disp !< displacement of actual indices 3102 INTEGER(KIND=rd_offset_kind) :: disp_f !< displacement in file 3103 INTEGER(KIND=rd_offset_kind) :: disp_n !< displacement of next column 3104 INTEGER(iwp), OPTIONAL :: first_index !< 3105 3106 INTEGER(iwp) :: i !< 3107 INTEGER(iwp) :: i_f !< 3108 INTEGER(iwp) :: j !< 3109 INTEGER(iwp) :: j_f !< 3110 INTEGER(iwp) :: lo_first_index !< 3111 INTEGER(iwp) :: nr_bytes !< 3112 INTEGER(iwp) :: nr_bytes_f !< 3113 INTEGER(iwp) :: nr_words !< 3114 #if defined( __parallel ) 3115 INTEGER, DIMENSION(rd_status_size) :: status !< 3116 #else 3117 TYPE(C_PTR) :: buf !< 3118 #endif 3119 3120 LOGICAL :: found !< 3081 INTEGER(iwp) :: ie !< 3082 INTEGER(iwp) :: ind_gb !< 3083 INTEGER(iwp) :: ind_out !< 3084 INTEGER(iwp) :: is !< 3085 INTEGER(iwp) :: n !< 3086 INTEGER(iwp) :: n_trans !< 3087 3088 INTEGER(iwp),DIMENSION(0:numprocs-1) :: lo_index !< 3089 INTEGER, DIMENSION(rd_status_size) :: status !< 3090 #endif 3091 LOGICAL :: found !< 3121 3092 3122 3093 REAL(wp), INTENT(OUT), DIMENSION(:), TARGET :: data !< 3094 #if defined( __parallel ) 3095 REAL(wp),DIMENSION(:),ALLOCATABLE :: put_buffer !< 3096 #endif 3123 3097 3124 3098 … … 3132 3106 DO i = 1, tgh%nr_arrays 3133 3107 IF ( TRIM( array_names(i) ) == TRIM( name ) ) THEN 3108 ! 3109 !-- ATTENTION: The total_number_of_surface_values and wp MUST be INTERGER(8). 3110 !-- The compiler (at least Intel) first computes total_number_of_surface_values*wp 3111 !-- and then does the conversion to INTEGER(8). 3112 !-- This may lead to wrong results when total_number_of_surface_values*wp is > 2*10**6 3134 3113 array_position = array_offset(i) + ( lo_first_index - 1 ) * & 3135 total_number_of_surface_values * wp3114 INT( total_number_of_surface_values, idp ) * INT( wp, idp ) 3136 3115 found = .TRUE. 3137 3116 EXIT … … 3139 3118 ENDDO 3140 3119 3141 disp = -1 3142 disp_f = -1 3143 disp_n = -1 3120 ! 3121 !-- In case of 2d-data, name is written only once 3122 IF ( lo_first_index == 1 ) THEN 3123 3124 IF ( header_array_index == max_nr_arrays ) THEN 3125 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 3126 CALL message( 'rrd_mpi_io_surface', 'PA0585', 1, 2, 0, 6, 0 ) 3127 ENDIF 3128 3129 array_names(header_array_index) = name 3130 array_offset(header_array_index) = array_position 3131 header_array_index = header_array_index + 1 3132 3133 ENDIF 3134 3144 3135 IF ( found ) THEN 3145 3146 3136 IF ( cyclic_fill_mode ) THEN 3147 3137 3148 3138 CALL rrd_mpi_io_surface_cyclic_fill 3139 RETURN 3149 3140 3150 3141 ELSE 3151 3152 IF ( MAXVAL( m_global_start ) == -1 ) RETURN ! Nothing to do on this PE 3142 #if defined( __parallel ) 3143 ! 3144 !-- Read data from restart file 3145 CALL sm_io%sm_node_barrier() ! has no effect if I/O on limited number of cores is inactive 3146 IF ( sm_io%iam_io_pe ) THEN 3147 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_surf, 'native', & 3148 MPI_INFO_NULL, ierr ) 3149 CALL MPI_FILE_READ_ALL ( fh, array_out, SIZE(array_out), MPI_REAL, status, ierr ) 3150 ENDIF 3151 CALL sm_io%sm_node_barrier() 3152 3153 ! 3154 !-- Copy data into transfer buffer. Data is organized in a way that only one MPI_PUT to the 3155 !-- respective PE ist required. 3156 ALLOCATE( put_buffer(SUM( transfer_index(4,:) )) ) 3157 3158 ind_gb = 1 3159 DO i = 1, SIZE( local_indices, 2 ) 3160 ind_out = local_indices(1,i) 3161 DO j = 1, local_indices(2,i) 3162 put_buffer(ind_gb) = array_out(ind_out) 3163 ind_out = ind_out + 1 3164 ind_gb = ind_gb + 1 3165 ENDDO 3166 ENDDO 3167 ! 3168 !-- Transfer data from I/O PEs to the respective PEs to which they belong. 3169 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3170 3171 buf_start = 1 3172 DO n = 0, numprocs-1 3173 n_trans = transfer_index(4,n) 3174 IF ( n_trans > 0 ) THEN 3175 disp = transfer_index(3,n) - 1 3176 CALL MPI_PUT( put_buffer(buf_start), n_trans, MPI_REAL, n, disp, n_trans, MPI_REAL,& 3177 win_surf, ierr) 3178 buf_start = buf_start + n_trans 3179 ENDIF 3180 ENDDO 3181 3182 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3183 DEALLOCATE( put_buffer ) 3184 ! 3185 !-- Copy from RMA window into output array (data) to allow transfering data to target PEs. 3186 !-- Check, if the number of surface values per grid cell match the index setup. 3187 lo_index = thread_values 3153 3188 DO i = nxl, nxr 3154 3189 DO j = nys, nyn 3155 3156 IF ( m_global_start(j,i) > 0 ) THEN 3157 disp = array_position+(m_global_start(j,i)-1) * wp 3158 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 3159 nr_bytes = nr_words * wp 3190 is = lo_index(target_thread(j,i)) + 1 3191 ie = is + m_end_index(j,i) - m_start_index(j,i) 3192 data(m_start_index(j,i):m_end_index(j,i)) = array_1d(is:ie) 3193 lo_index(target_thread(j,i)) = lo_index(target_thread(j,i)) + & 3194 e_end_index(j,i) - e_start_index(j,i) + 1 3195 ! 3196 !-- TODO: Test can be removed later. 3197 IF ( e_end_index(j,i)-e_start_index(j,i)+1 /= NINT( array_1d(is-1) ) ) THEN 3198 WRITE( 9, '(A,6I8)' ) 'Nr surface values does not match ', j, i, & 3199 e_start_index(j,i), e_end_index(j,i), & 3200 e_end_index(j,i)-e_start_index(j,i)+1 , & 3201 NINT( array_1d(is-1) ) 3202 FLUSH( 9 ) 3203 CALL MPI_ABORT( comm2d, 1, ierr ) 3160 3204 ENDIF 3161 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry3162 disp_f = disp3163 nr_bytes_f = 03164 i_f = i3165 j_f = j3166 ENDIF3167 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry3168 disp_n = -13169 IF ( nr_bytes > 0 ) THEN3170 nr_bytes_f = nr_bytes_f+nr_bytes3171 ENDIF3172 ELSEIF ( j == nyn ) THEN ! Next x3173 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN3174 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp3175 ELSE3176 CYCLE3177 ENDIF3178 ELSE3179 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN3180 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp3181 ELSE3182 CYCLE3183 ENDIF3184 ENDIF3185 3186 3187 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block3188 nr_bytes_f = nr_bytes_f + nr_bytes3189 ELSE ! Read3190 #if defined( __parallel )3191 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr )3192 nr_words = nr_bytes_f / wp3193 CALL MPI_FILE_READ( fhs, data(m_start_index(j_f,i_f)), nr_words, MPI_REAL, status, &3194 ierr )3195 #else3196 !3197 !-- Use C_PTR here, because posix read does not work with indexed array3198 buf = C_LOC( data(m_start_index(j_f,i_f)) )3199 CALL posix_lseek( fh, disp_f )3200 CALL posix_read( fh, buf, nr_bytes_f )3201 #endif3202 disp_f = disp3203 nr_bytes_f = nr_bytes3204 i_f = i3205 j_f = j3206 ENDIF3207 3208 3205 ENDDO 3209 3206 ENDDO 3210 ENDIF 3211 3212 3213 ELSE 3214 3215 message_string = 'surface array "' // TRIM( name ) // '" not found in restart file' 3216 CALL message( 'rrd_mpi_io_surface', 'PA0722', 3, 2, 0, 6, 0 ) 3207 3208 3209 #else 3210 CALL posix_lseek( fh, array_position ) 3211 CALL posix_read( fh, array_out, SIZE(array_out) ) 3212 3213 DO i = nxl, nxr 3214 DO j = nys, nyn 3215 data(m_start_index(j,i):m_end_index(j,i)) = & 3216 array_out(e_start_index(j,i)+1:e_end_index(j,i)) 3217 ! 3218 !-- TODO: Test can be removed later. 3219 IF ( e_end_index(j,i)-e_start_index(j,i)+1 /= NINT(array_out(e_start_index(j,i))) )& 3220 THEN 3221 WRITE( 9, '(A,6I8)' ) 'Nr surface values does not match ', j, i, & 3222 e_start_index(j,i), e_end_index(j,i), & 3223 e_end_index(j,i)-e_start_index(j,i)+1, & 3224 NINT( array_out(e_start_index(j,i)) ) 3225 FLUSH( 9 ) 3226 CALL ABORT() 3227 ENDIF 3228 ENDDO 3229 ENDDO 3230 #endif 3231 ENDIF 3217 3232 3218 3233 ENDIF … … 3226 3241 INTEGER(iwp) :: i !< 3227 3242 INTEGER(iwp) :: ie !< 3228 #if defined( __parallel )3229 INTEGER(iwp) :: ierr !<3230 #endif3231 3243 INTEGER(iwp) :: is !< 3232 3244 INTEGER(iwp) :: i_remote !< … … 3241 3253 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_offs !< 3242 3254 #else 3243 INTEGER(idp) :: rem_offs 3244 #endif 3245 3246 LOGICAL :: write_done !< 3247 3248 3249 ! 3250 !-- In the current version, there is only 1 value per grid cell allowed. 3251 !-- In this special case, the cyclical repetition can be done with the same method as for 2d-real 3252 !-- array. 3255 INTEGER(idp) :: rem_offs !< 3256 #endif 3257 3258 REAL(wp), DIMENSION(:), ALLOCATABLE :: c_data !< 3259 3260 3261 ! 3262 !-- ATTENTION: This version allows only 1 surface element per grid cell. 3263 ! 3264 !-- Activate grid of the smaller prerun, i.e. grid variables like nxl, nxr, nys, nyn and other 3265 !-- values are set according to the prerun settings. 3253 3266 CALL prerun_grid%activate_grid_from_this_class() 3254 3267 3255 3268 IF ( pe_active_for_read ) THEN 3256 rmabuf_2d = -1.0 3269 3270 IF ( MAXVAL( m_end_index ) <= 0 ) THEN 3271 CALL mainrun_grid%activate_grid_from_this_class() 3272 IF ( debug_output ) THEN 3273 CALL debug_message( 'PE inactive for reading restart or prerun data', 'start' ) 3274 ENDIF 3275 RETURN 3276 ENDIF 3277 3278 ALLOCATE( c_data(MAXVAL( m_end_index )) ) 3279 3280 ! 3281 !-- Recursive CALL of rrd_mpi_io_surface. 3282 !-- rrd_mpi_io_surface is called with cyclic_fill_mode = .FALSE. on the smaller prerun grid. 3283 cyclic_fill_mode = .FALSE. 3284 CALL rrd_mpi_io_surface( name, c_data ) 3285 cyclic_fill_mode = .TRUE. 3286 3257 3287 DO i = nxl, nxr 3258 3288 DO j = nys, nyn 3259 3260 IF ( m_global_start(j,i) > 0 ) THEN 3261 disp = array_position+(m_global_start(j,i)-1) * wp 3262 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 3263 nr_bytes = nr_words * wp 3264 ENDIF 3265 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry 3266 disp_f = disp 3267 nr_bytes_f = 0 3268 write_done = .TRUE. 3269 ENDIF 3270 IF( write_done ) THEN 3271 i_f = i 3272 j_f = j 3273 write_done = .FALSE. 3274 ENDIF 3275 3276 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry 3277 disp_n = -1 3278 IF ( nr_bytes > 0 ) THEN 3279 nr_bytes_f = nr_bytes_f+nr_bytes 3280 ENDIF 3281 ELSEIF ( j == nyn ) THEN ! Next x 3282 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN 3283 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp 3284 ELSE 3285 CYCLE 3286 ENDIF 3287 ELSE 3288 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN 3289 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp 3290 ELSE 3291 CYCLE 3292 ENDIF 3293 ENDIF 3294 3295 3296 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block 3297 nr_bytes_f = nr_bytes_f + nr_bytes 3298 ELSE ! Read 3299 #if defined( __parallel ) 3300 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr ) 3301 nr_words = nr_bytes_f / wp 3302 CALL MPI_FILE_READ( fhs, rmabuf_2d(j_f,i_f), nr_words, MPI_REAL, status, ierr ) 3303 #else 3304 CALL posix_lseek( fh, disp_f ) 3305 CALL posix_read( fh, rmabuf_2d(j_f:,i_f:), nr_bytes_f ) 3306 #endif 3307 3308 disp_f = disp 3309 nr_bytes_f = nr_bytes 3310 write_done = .TRUE. 3311 ENDIF 3312 3289 rmabuf_2d(j,i) = c_data(c_start_index(j,i)) 3313 3290 ENDDO 3314 3291 ENDDO 3315 3292 3316 3293 ENDIF 3317 3294 ! 3295 !-- Activate grid of the mainrun, i.e. grid variables like nxl, nxr, nys, nyn and other values 3296 !-- are set according to the mainrun settings. 3318 3297 CALL mainrun_grid%activate_grid_from_this_class() 3319 3298 … … 3324 3303 #endif 3325 3304 3326 IF ( .NOT. pe_active_for_read ) THEN 3327 3328 is = nxl 3329 ie = nxr 3330 js = nys 3331 je = nyn 3332 3333 ELSE 3334 3335 is = nxl 3336 ie = nxr 3337 js = prerun_grid%nys+1 3338 je = nyn 3339 3340 DO i = is, ie 3341 DO j = js, je 3342 i_remote = MOD(i,nx_on_file+1) 3343 j_remote = MOD(j,ny_on_file+1) 3344 rem_pe = remote_pe(i_remote,j_remote) 3345 rem_offs = rma_offset(i_remote,j_remote) 3346 nval = 1 3347 3348 #if defined( __parallel ) 3349 IF ( rem_pe /= myid ) THEN 3350 CALL MPI_GET( data(m_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, & 3351 MPI_REAL, rmawin_2d, ierr) 3352 ELSE 3353 data(m_start_index(j,i)) = rmabuf_2d(j_remote,i_remote) 3354 ENDIF 3355 #else 3356 data(m_start_index(j,i)) = array_2d(i_remote,j_remote) 3357 #endif 3358 ENDDO 3359 ENDDO 3360 is = prerun_grid%nxr+1 3361 ie = nxr 3362 js = nys 3363 je = nyn 3364 3365 ENDIF 3305 ! 3306 !-- After reading surface data on the small grid, map these data in a cyclic way to all respective 3307 !-- grid points of the main run. 3308 is = nxl 3309 ie = nxr 3310 js = nys 3311 je = nyn 3366 3312 3367 3313 DO i = is, ie 3368 3314 DO j = js, je 3369 i_remote = MOD( i,nx_on_file+1)3370 j_remote = MOD( j,ny_on_file+1)3315 i_remote = MOD( i, nx_on_file+1 ) 3316 j_remote = MOD( j, ny_on_file+1 ) 3371 3317 rem_pe = remote_pe(i_remote,j_remote) 3372 3318 rem_offs = rma_offset(i_remote,j_remote) … … 3375 3321 #if defined( __parallel ) 3376 3322 IF ( rem_pe /= myid ) THEN 3377 CALL MPI_GET( data( m_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, &3323 CALL MPI_GET( data(o_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, & 3378 3324 MPI_REAL, rmawin_2d, ierr) 3379 3325 ELSE 3380 data( m_start_index(j,i)) = rmabuf_2d(j_remote,i_remote)3326 data(o_start_index(j,i)) = rmabuf_2d(j_remote,i_remote) 3381 3327 ENDIF 3382 3328 #else 3383 data( m_start_index(j,i)) = array_2d(i_remote,j_remote)3329 data(o_start_index(j,i)) = array_2d(i_remote,j_remote) 3384 3330 #endif 3385 3331 ENDDO … … 3391 3337 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 3392 3338 #endif 3339 3340 IF ( ALLOCATED( c_data ) ) DEALLOCATE( c_data ) 3393 3341 3394 3342 END SUBROUTINE rrd_mpi_io_surface_cyclic_fill … … 3539 3487 array_position = prt_nr_bytes 3540 3488 3541 write(9,*) 'array_position after particle ',array_position,prt_nr_bytes,rs3542 3543 3489 DEALLOCATE( prt_data ) 3544 3490 … … 3556 3502 IMPLICIT NONE 3557 3503 3558 CHARACTER(LEN=*), INTENT(IN) :: name !< 3559 3560 #if defined( __parallel ) 3561 INTEGER(KIND=rd_offset_kind) :: disp !< 3562 #endif 3563 INTEGER(iwp), OPTIONAL :: first_index !< 3564 #if defined( __parallel ) 3565 INTEGER(iwp) :: i !< 3566 #endif 3567 INTEGER(iwp) :: lo_first_index !< 3568 INTEGER(KIND=rd_offset_kind) :: offset !< 3569 3570 #if defined( __parallel ) 3571 INTEGER, DIMENSION(rd_status_size) :: status !< 3572 #endif 3573 3574 REAL(wp), INTENT(IN), DIMENSION(:), TARGET :: data !< 3575 3576 3577 offset = 0 3504 CHARACTER(LEN=*), INTENT(IN) :: name !< 3505 3506 INTEGER(iwp), OPTIONAL :: first_index !< 3507 INTEGER(iwp) :: i !< 3508 INTEGER(iwp) :: j !< 3509 INTEGER(iwp) :: lo_first_index !< 3510 #if defined( __parallel ) 3511 INTEGER(iwp) :: buf_start !< 3512 INTEGER(iwp) :: ie !< 3513 INTEGER(iwp) :: is !< 3514 INTEGER(iwp) :: ind_gb !< 3515 INTEGER(iwp) :: ind_out !< 3516 INTEGER(iwp) :: n !< 3517 INTEGER(iwp) :: n_trans !< 3518 #endif 3519 3520 #if defined( __parallel ) 3521 INTEGER(KIND=MPI_ADDRESS_KIND) :: disp !< displacement in RMA window 3522 INTEGER(KIND=rd_offset_kind) :: offset !< 3523 3524 INTEGER(iwp), DIMENSION(0:numprocs-1) :: lo_index !< 3525 INTEGER(iwp), DIMENSION(rd_status_size) :: status !< 3526 #endif 3527 3528 REAL(wp), INTENT(IN), DIMENSION(:), TARGET :: data !< 3529 #if defined( __parallel ) 3530 REAL(wp), DIMENSION(:), ALLOCATABLE :: get_buffer !< 3531 #endif 3532 3533 3578 3534 lo_first_index = 1 3579 3535 … … 3581 3537 lo_first_index = first_index 3582 3538 ENDIF 3539 3583 3540 ! 3584 3541 !-- In case of 2d-data, name is written only once … … 3586 3543 3587 3544 IF ( header_array_index == max_nr_arrays ) THEN 3588 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 3545 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 3546 CALL message( 'wrd_mpi_io_surface', 'PA0585', 1, 2, 0, 6, 0 ) 3589 3547 ENDIF 3590 3548 … … 3596 3554 3597 3555 #if defined( __parallel ) 3598 IF ( sm_io%is_sm_active() ) THEN 3599 DO i = 1, nr_val 3600 array_1d(i+local_start) = data(i) 3556 offset = 0 3557 3558 ALLOCATE( get_buffer(SUM( transfer_index(4,:) )) ) 3559 ! 3560 !-- Copy from input array (data) to RMA window to allow the target PEs to get the appropiate data. 3561 !-- At this point, a dummy surface element is added. This makes sure that every x-y grid cell owns 3562 !-- at least one surface element. This way, bookkeeping becomes much easier. 3563 lo_index = thread_values 3564 DO i = nxl, nxr 3565 DO j = nys, nyn 3566 is = lo_index(target_thread(j,i)) + 1 3567 ie = is + m_end_index(j,i) - m_start_index(j,i) 3568 ! 3569 !-- Store number of surface elements in dummy additional surface element 3570 array_1d(is-1) = e_end_index(j,i) - e_start_index(j,i) + 1 3571 array_1d(is:ie) = data(m_start_index(j,i):m_end_index(j,i)) 3572 lo_index(target_thread(j,i)) = lo_index(target_thread(j,i)) + & 3573 e_end_index(j,i) - e_start_index(j,i) + 1 3601 3574 ENDDO 3602 ELSE 3603 ! array_1d => data !kk Did not work in all cases why??? 3604 ALLOCATE( array_1d( SIZE( data ) ) ) 3605 array_1d = data 3606 ENDIF 3607 3608 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 3575 ENDDO 3576 ! 3577 !-- On target PE, get data from source PEs which are assigned for output on this PE. 3578 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3579 3580 buf_start = 1 3581 DO n = 0, numprocs-1 3582 n_trans = transfer_index(4,n) 3583 IF ( n_trans > 0 ) THEN 3584 disp = transfer_index(3,n) - 1 3585 CALL MPI_GET( get_buffer(buf_start), n_trans, MPI_REAL, n, disp, n_trans, MPI_REAL, & 3586 win_surf, ierr ) 3587 buf_start = buf_start + n_trans 3588 ENDIF 3589 ENDDO 3590 3591 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3592 ! 3593 !-- Copy data to output buffer. Here, the outpuf buffer matches the indices global_start and 3594 !-- global_end. 3595 ind_gb = 1 3596 DO i = 1, SIZE( local_indices, 2 ) 3597 ind_out = local_indices(1,i) 3598 DO j = 1, local_indices(2,i) 3599 array_out(ind_out) = get_buffer(ind_gb) 3600 ind_out = ind_out+1 3601 ind_gb = ind_gb+1 3602 ENDDO 3603 ENDDO 3604 3605 DEALLOCATE( get_buffer ) 3606 3607 ! 3608 !-- Write data to disk. 3609 CALL sm_io%sm_node_barrier() ! has no effect if I/O on limited number of cores is inactive 3609 3610 IF ( sm_io%iam_io_pe ) THEN 3610 IF ( all_pes_write ) THEN 3611 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_surf, 'native', MPI_INFO_NULL, & 3612 ierr ) 3613 CALL MPI_FILE_WRITE_ALL( fh, array_1d, nr_iope, MPI_REAL, status, ierr ) 3614 ELSE 3615 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 3616 IF ( nr_val > 0 ) THEN 3617 disp = array_position + 8 * ( glo_start - 1 ) 3618 CALL MPI_FILE_SEEK( fh, disp, MPI_SEEK_SET, ierr ) 3619 CALL MPI_FILE_WRITE( fh, array_1d, nr_iope, MPI_REAL, status, ierr ) 3620 ENDIF 3621 ENDIF 3611 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_surf, 'native', MPI_INFO_NULL, & 3612 ierr ) 3613 CALL MPI_FILE_WRITE_ALL( fh, array_out, SIZE( array_out ), MPI_REAL, status, ierr ) 3622 3614 ENDIF 3623 3615 CALL sm_io%sm_node_barrier() 3624 IF( .NOT. sm_io%is_sm_active() ) DEALLOCATE( array_1d ) 3625 #else 3616 #else 3617 DO i = nxl, nxr 3618 DO j = nys, nyn 3619 array_out(e_start_index(j,i)) = e_end_index(j,i) - e_start_index(j,i) + 1 3620 array_out(e_start_index(j,i)+1:e_end_index(j,i)) = & 3621 data(m_start_index(j,i):m_end_index(j,i)) 3622 ENDDO 3623 ENDDO 3624 3626 3625 CALL posix_lseek( fh, array_position ) 3627 CALL posix_write( fh, data, nr_val ) 3628 #endif 3626 CALL posix_write( fh, array_out, SIZE(array_out) ) 3627 #endif 3628 3629 3629 array_position = array_position + total_number_of_surface_values * wp 3630 3630 3631 ! IF ( lo_first_index == 1 ) THEN3632 ! IF ( debug_level >= 2 .AND. nr_val > 0 ) WRITE(9,*) 'w_surf_1 ', TRIM( name ), ' ', nr_val, SUM( data(1:nr_val) )3633 ! ELSE3634 ! IF ( debug_level >= 2 .AND. nr_val > 0 ) WRITE(9,*) 'w_surf_n ', TRIM( name ), ' ', &3635 ! lo_first_index, nr_val, SUM( data(1:nr_val) )3636 ! ENDIF3637 3638 3631 END SUBROUTINE wrd_mpi_io_surface 3639 3640 3632 3641 3633 … … 3690 3682 IF ( wr_flag .AND. sm_io%iam_io_pe ) THEN 3691 3683 3692 tgh%nr_int = header_int_index - 1 3693 tgh%nr_char = header_char_index - 1 3694 tgh%nr_real = header_real_index - 1 3695 tgh%nr_arrays = header_array_index - 1 3696 tgh%total_nx = iog%nx + 1 3697 tgh%total_ny = iog%ny + 1 3684 tgh%nr_int = header_int_index - 1 3685 tgh%nr_char = header_char_index - 1 3686 tgh%nr_real = header_real_index - 1 3687 tgh%nr_arrays = header_array_index - 1 3688 tgh%total_nx = iog%nx + 1 3689 tgh%total_ny = iog%ny + 1 3690 tgh%pes_along_x = pdims(1) 3691 tgh%pes_along_y = pdims(2) 3698 3692 IF ( include_total_domain_boundaries ) THEN ! Not sure, if LOGICAL interpretation is the same for all compilers, 3699 3693 tgh%i_outer_bound = 1 ! therefore store as INTEGER in general header … … 3802 3796 !-- Close MPI-IO files 3803 3797 #if defined( __parallel ) 3804 !3805 !-- Restart file has been opened with comm2d3806 IF ( fhs /= -1 ) THEN3807 CALL MPI_FILE_CLOSE( fhs, ierr )3808 ENDIF3809 3798 ! 3810 3799 !-- Free RMA windows … … 3816 3805 #endif 3817 3806 3818 IF (.NOT. pe_active_for_read ) RETURN 3807 IF ( ALLOCATED( e_start_index ) ) DEALLOCATE( e_start_index ) 3808 IF ( ALLOCATED( e_end_index ) ) DEALLOCATE( e_end_index ) 3809 IF ( ALLOCATED( m_start_index ) ) DEALLOCATE( m_start_index ) 3810 IF ( ALLOCATED( m_end_index ) ) DEALLOCATE( m_end_index ) 3811 IF ( ALLOCATED( target_thread ) ) DEALLOCATE( target_thread ) 3812 IF ( ALLOCATED( thread_index ) ) DEALLOCATE( thread_index ) 3813 IF ( ALLOCATED( thread_values ) ) DEALLOCATE( thread_values ) 3814 IF ( ALLOCATED( transfer_index ) ) DEALLOCATE( transfer_index ) 3815 3816 IF ( .NOT. pe_active_for_read ) RETURN 3819 3817 ! 3820 3818 !-- TODO: better explain the following message … … 3860 3858 !> data is not time critical (data size is comparably small), it will be read by all cores. 3861 3859 !--------------------------------------------------------------------------------------------------! 3862 SUBROUTINE rd_mpi_io_surface_filetypes( start_index, end_index, data_to_write, global_start ) 3860 RECURSIVE SUBROUTINE rd_mpi_io_surface_filetypes( start_index, end_index, data_to_write, & 3861 global_start, global_end ) 3863 3862 3864 3863 IMPLICIT NONE 3865 3864 3866 INTEGER(iwp) :: i !< loop index 3867 INTEGER(iwp) :: j !< loop index 3868 INTEGER(KIND=rd_offset_kind) :: offset !< 3869 3870 INTEGER(iwp), DIMENSION(1) :: dims1 !< 3871 INTEGER(iwp), DIMENSION(1) :: lize1 !< 3872 INTEGER(iwp), DIMENSION(1) :: start1 !< 3873 3874 INTEGER(iwp), DIMENSION(0:numprocs-1) :: all_nr_val !< number of values for all PEs 3875 INTEGER(iwp), DIMENSION(0:numprocs-1) :: lo_nr_val !< local number of values in x and y direction 3876 3877 3878 INTEGER, INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: end_index !< 3879 INTEGER, INTENT(OUT), DIMENSION(nys:nyn,nxl:nxr) :: global_start !< 3880 INTEGER, INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: start_index !< 3881 3882 LOGICAL, INTENT(OUT) :: data_to_write !< returns, if surface data have to be written 3883 3884 ! 3885 !-- Actions during reading 3886 IF ( rd_flag ) THEN 3887 ! 3888 !-- Set start index and end index for the mainrun grid. 3889 !-- ATTENTION: This works only for horizontal surfaces with one vale per grid cell!!! 3890 IF ( cyclic_fill_mode ) THEN 3891 DO i = nxl, nxr 3892 DO j = nys, nyn 3893 start_index (j,i) = (i-nxl) * nny + j - nys + 1 3894 end_index (j,i) = start_index(j,i) 3895 ENDDO 3865 INTEGER(iwp) :: e_lo_start !< 3866 INTEGER(iwp) :: i !< loop index 3867 INTEGER(iwp) :: j !< loop index 3868 INTEGER(iwp) :: index_offset !< 3869 INTEGER(iwp) :: last_end_index !< 3870 INTEGER(iwp) :: lo_start !< 3871 INTEGER(iwp) :: nr_surfcells_pe !< 3872 INTEGER(iwp) :: rest_cells_pe !< 3873 INTEGER(iwp) :: rest_bound !< 3874 #if defined( __parallel ) 3875 INTEGER(iwp) :: io_end_index !< 3876 INTEGER(iwp) :: io_start_index !< 3877 INTEGER(iwp) :: n !< loop index 3878 INTEGER(iwp) :: nr_previous !< 3879 #endif 3880 3881 INTEGER(iwp), DIMENSION(0:numprocs-1,2) :: nr_surfcells_all_s !< 3882 INTEGER(iwp), DIMENSION(0:numprocs-1,2) :: nr_surfcells_all_r !< 3883 #if defined( __parallel ) 3884 INTEGER(iwp), DIMENSION(1) :: dims1 !< global dimension for MPI_TYPE_CREATE_SUBARRAY 3885 INTEGER(iwp), DIMENSION(1) :: lsize1 !< local size for MPI_TYPE_CREATE_SUBARRAY 3886 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_cells_to_thread !< 3887 INTEGER(iwp), DIMENSION(0:pdims(1)) :: nr_surf_cells_x !< 3888 INTEGER(iwp), DIMENSION(0:pdims(1)) :: nr_surf_cells_x_s !< 3889 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_values_to_thread !< 3890 INTEGER(iwp), DIMENSION(1) :: start1 !< start index for MPI_TYPE_CREATE_SUBARRAY 3891 INTEGER(iwp), DIMENSION(nxl:nxr) :: sum_y !< 3892 #endif 3893 3894 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: end_index !< local end indx 3895 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: global_start !< global start index 3896 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: global_end !< global end index 3897 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: start_index !< local start index 3898 #if defined( __parallel ) 3899 INTEGER(iwp), DIMENSION(0:myidy,nxl:nxr) :: nr_previous_y !< 3900 INTEGER(iwp), DIMENSION(0:pdims(2),nxl:nxr) :: nr_surf_cells_y !< 3901 INTEGER(iwp), DIMENSION(0:pdims(2),nxl:nxr) :: nr_surf_cells_y_s !< 3902 INTEGER(iwp), DIMENSION(4,0:numprocs-1) :: transfer_index_s !< 3903 #endif 3904 3905 LOGICAL, INTENT(OUT) :: data_to_write !< returns .TRUE., if surface data have been written 3906 LOGICAL :: only_dummy_values !< only dummy values, i.e. no data to write 3907 3908 3909 ! 3910 !-- Clean up previous calls. 3911 #if defined( __parallel ) 3912 IF ( win_surf /= -1 ) THEN 3913 CALL MPI_WIN_FREE( win_surf, ierr ) 3914 DEALLOCATE( array_1d ) 3915 win_surf = -1 3916 ENDIF 3917 IF ( ft_surf /= -1 .AND. sm_io%iam_io_pe ) THEN 3918 CALL MPI_TYPE_FREE( ft_surf, ierr ) 3919 ENDIF 3920 ft_surf = -1 3921 IF ( sm_io%is_sm_active() ) THEN 3922 IF ( win_out /= -1 ) THEN 3923 CALL MPI_WIN_FREE( win_out, ierr ) 3924 win_out = -1 3925 ENDIF 3926 ELSE 3927 IF ( ASSOCIATED( array_out ) ) DEALLOCATE( array_out ) 3928 ENDIF 3929 #else 3930 IF ( ASSOCIATED( array_out ) ) DEALLOCATE( array_out ) 3931 #endif 3932 3933 IF ( cyclic_fill_mode ) THEN 3934 CALL cyclic_fill_surface_filetype 3935 RETURN 3936 ELSE 3937 IF ( .NOT. ALLOCATED( e_end_index ) ) ALLOCATE( e_end_index(nys:nyn,nxl:nxr) ) 3938 IF ( .NOT. ALLOCATED( e_start_index ) ) ALLOCATE( e_start_index(nys:nyn,nxl:nxr) ) 3939 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr) ) 3940 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) ) 3941 IF ( .NOT. ALLOCATED( target_thread ) ) ALLOCATE( target_thread(nys:nyn,nxl:nxr) ) 3942 IF ( .NOT. ALLOCATED( thread_index ) ) ALLOCATE( thread_index(0:numprocs-1) ) 3943 IF ( .NOT. ALLOCATED( thread_values ) ) ALLOCATE( thread_values(0:numprocs-1) ) 3944 IF ( .NOT. ALLOCATED( transfer_index ) ) ALLOCATE( transfer_index(4,0:numprocs-1) ) 3945 ENDIF 3946 3947 IF ( wr_flag) THEN 3948 ! 3949 !-- Add one dummy value at every grid box. 3950 !-- This allows to use MPI_FILE_WRITE_ALL and MPI_FILE_READ_ALL with subarray file type. 3951 index_offset = 0 3952 last_end_index = 0 3953 DO i = nxl, nxr 3954 DO j = nys, nyn 3955 e_start_index(j,i) = start_index (j,i) + index_offset 3956 IF ( end_index (j,i) - start_index(j,i) < 0 ) THEN 3957 e_end_index (j,i) = last_end_index+1 3958 last_end_index = last_end_index+1 3959 ELSE 3960 e_end_index (j,i) = end_index(j,i) + index_offset + 1 3961 last_end_index = e_end_index (j,i) 3962 ENDIF 3963 index_offset = index_offset + 1 3964 ENDDO 3965 ENDDO 3966 #if defined( __parallel ) 3967 ! 3968 !-- Compute indices for global, PE independent 1-d surface element array. 3969 nr_surf_cells_y_s = 0 3970 ! 3971 !-- Count number of surface elements in y-direction for every x. 3972 DO i = nxl, nxr 3973 nr_surf_cells_y_s(myidy,i) = SUM( e_end_index (:,i) - e_start_index (:,i) + 1 ) 3974 ENDDO 3975 ! 3976 !-- Distribute these values to all PEs along y. 3977 CALL MPI_ALLREDUCE( nr_surf_cells_y_s, nr_surf_cells_y, SIZE( nr_surf_cells_y ), & 3978 MPI_INTEGER, MPI_SUM, comm1dy, ierr ) 3979 ! 3980 !-- Sum all surface elements along y for individual x PEs 3981 nr_surf_cells_x_s = 0 3982 nr_surf_cells_x_s(myidx) = SUM( nr_surf_cells_y ) 3983 ! 3984 !-- Distribute to all PEs along x. 3985 CALL MPI_ALLREDUCE( nr_surf_cells_x_s, nr_surf_cells_x, SIZE( nr_surf_cells_x ), & 3986 MPI_INTEGER, MPI_SUM, comm1dx, ierr ) 3987 DO i = nxl, nxr 3988 nr_previous_y(:,i) = 0 3989 DO n = 1, myidy 3990 nr_previous_y(n,i) = nr_previous_y(n-1,i) + nr_surf_cells_y(n-1,i) 3896 3991 ENDDO 3897 ENDIF 3898 3899 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) ) 3900 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr) ) 3901 IF ( .NOT. ALLOCATED( m_global_start ) ) ALLOCATE( m_global_start(nys:nyn,nxl:nxr) ) 3902 ! 3903 !-- Save arrays for later reading 3904 m_start_index = start_index 3905 m_end_index = end_index 3906 m_global_start = global_start 3907 nr_val = MAXVAL( end_index ) 3908 3909 ENDIF 3910 3911 IF ( .NOT. pe_active_for_read ) RETURN 3912 3913 IF ( cyclic_fill_mode ) CALL prerun_grid%activate_grid_from_this_class() 3914 3915 offset = 0 3916 lo_nr_val= 0 3917 lo_nr_val(myid) = MAXVAL( end_index ) 3918 #if defined( __parallel ) 3919 CALL MPI_ALLREDUCE( lo_nr_val, all_nr_val, numprocs, MPI_INTEGER, MPI_SUM, comm2d, ierr ) 3920 IF ( ft_surf /= -1 .AND. sm_io%iam_io_pe ) THEN 3921 CALL MPI_TYPE_FREE( ft_surf, ierr ) ! If set, free last surface filetype 3922 ENDIF 3923 3924 IF ( win_surf /= -1 ) THEN 3925 IF ( sm_io%is_sm_active() ) THEN 3926 CALL MPI_WIN_FREE( win_surf, ierr ) 3927 ENDIF 3928 win_surf = -1 3929 ENDIF 3930 3931 IF ( sm_io%is_sm_active() .AND. rd_flag ) THEN 3932 IF ( fhs == -1 ) THEN 3933 CALL MPI_FILE_OPEN( comm2d, TRIM( io_file_name ), MPI_MODE_RDONLY, MPI_INFO_NULL, fhs, & 3934 ierr ) 3935 ENDIF 3992 ENDDO 3993 3994 sum_y(nxl) = SUM( nr_surf_cells_y(:,nxl) ) 3995 DO i = nxl, nxr 3996 IF ( i > nxl ) THEN 3997 sum_y(i) = sum_y(i-1) + SUM( nr_surf_cells_y(:,i) ) 3998 ENDIF 3999 ENDDO 4000 4001 nr_previous = 0 4002 IF ( myidx >= 1 ) THEN 4003 nr_previous = SUM(nr_surf_cells_x(0:myidx-1)) 4004 ENDIF 4005 4006 global_start(nys,nxl) = 1 + nr_previous + nr_previous_y(myidy,nxl) 4007 DO j = nys+1, nyn 4008 global_start(j,nxl) = global_start(j-1,nxl) + e_end_index(j-1,nxl) - & 4009 e_start_index(j-1,nxl) + 1 4010 ENDDO 4011 4012 DO i = nxl+1, nxr 4013 global_start(nys,i) = 1 + nr_previous + nr_previous_y(myidy,i) + sum_y(i-1) 4014 DO j = nys+1, nyn 4015 global_start(j,i) = global_start(j-1,i) + e_end_index(j-1,i) - e_start_index(j-1,i) + 1 4016 ENDDO 4017 ENDDO 4018 #else 4019 global_start = e_start_index 4020 #endif 4021 DO i = nxl, nxr 4022 DO j = nys, nyn 4023 global_end(j,i) = global_start(j,i) + e_end_index (j,i) - e_start_index (j,i) 4024 ENDDO 4025 ENDDO 4026 3936 4027 ELSE 3937 fhs = fh 3938 ENDIF 3939 #else 3940 all_nr_val(myid) = lo_nr_val(myid) 3941 #endif 3942 nr_val = lo_nr_val(myid) 4028 ! 4029 !-- In case of read, compute e_start_index and e_end_index for current processor grid. 4030 !-- This data contains one extra value for every i and j. 4031 e_lo_start = 1 4032 lo_start = 1 4033 DO i = nxl, nxr 4034 DO j = nys, nyn 4035 e_start_index(j,i) = e_lo_start 4036 e_end_index(j,i) = e_lo_start + global_end(j,i) - global_start(j,i) 4037 e_lo_start = e_lo_start + global_end(j,i) - global_start(j,i) + 1 4038 start_index(j,i) = lo_start 4039 end_index(j,i) = lo_start + global_end(j,i) - global_start(j,i) - 1 4040 lo_start = lo_start + global_end(j,i) - global_start(j,i) 4041 ENDDO 4042 ENDDO 4043 4044 ENDIF 4045 4046 nr_surfcells_all_s = 0 4047 nr_surfcells_all_s(myid,1) = MAXVAL( e_end_index ) ! don't split surface elements of one gridbox 4048 nr_surfcells_all_s(myid,2) = MAXVAL( e_end_index - e_start_index ) 4049 4050 #if defined( __parallel ) 4051 CALL MPI_ALLREDUCE( nr_surfcells_all_s, nr_surfcells_all_r, SIZE( nr_surfcells_all_s ), & 4052 MPI_INTEGER, MPI_SUM, comm2d, ierr ) 4053 #else 4054 nr_surfcells_all_r = nr_surfcells_all_s 4055 #endif 3943 4056 3944 4057 total_number_of_surface_values = 0 … … 3947 4060 glo_start = total_number_of_surface_values + 1 3948 4061 ENDIF 3949 total_number_of_surface_values = total_number_of_surface_values + all_nr_val(i)4062 total_number_of_surface_values = total_number_of_surface_values + nr_surfcells_all_r(i,1) 3950 4063 ENDDO 3951 3952 ! 3953 !-- Actions during reading 3954 IF ( rd_flag ) THEN 3955 3956 #if defined( __parallel ) 3957 CALL MPI_FILE_SET_VIEW( fhs, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 3958 #endif 3959 ENDIF 3960 3961 IF ( cyclic_fill_mode ) CALL mainrun_grid%activate_grid_from_this_class() 3962 3963 ! 3964 !-- Actions during writing 3965 IF ( wr_flag ) THEN 3966 ! 3967 !-- Create surface filetype 3968 ft_surf = -1 3969 global_start = start_index + glo_start - 1 3970 3971 WHERE ( end_index < start_index ) 3972 global_start = -1 3973 ENDWHERE 3974 3975 #if defined( __parallel ) 3976 IF ( sm_io%is_sm_active() ) THEN 3977 IF ( sm_io%iam_io_pe ) THEN 3978 ! 3979 !-- Calculate number of values of all PEs of an I/O group 3980 nr_iope = 0 3981 DO i = myid, myid+sm_io%sh_npes-1 3982 nr_iope = nr_iope + all_nr_val(i) 4064 only_dummy_values = ( MAXVAL( nr_surfcells_all_r(:,2) ) <= 0 ) 4065 4066 ! 4067 !-- Compute indices of equally distributed surface elements. 4068 !-- Number of surface values scheduled for ouput on this PE: 4069 nr_surfcells_pe = total_number_of_surface_values / numprocs 4070 rest_cells_pe = MOD( total_number_of_surface_values, numprocs ) 4071 rest_bound = rest_cells_pe * ( nr_surfcells_pe + 1 ) 4072 m_start_index = start_index 4073 m_end_index = end_index 4074 4075 ! 4076 !-- Compute number of elements on source PE, which have to be send to the corresponding target PE. 4077 #if defined( __parallel ) 4078 nr_cells_to_thread = 0 4079 nr_values_to_thread = 0 4080 DO i = nxl, nxr 4081 DO j = nys, nyn 4082 IF ( rest_cells_pe == 0 ) THEN 4083 target_thread(j,i) = ( global_start(j,i) - 1 ) / nr_surfcells_pe 4084 ELSE 4085 IF ( global_start(j,i) <= rest_bound ) THEN 4086 target_thread(j,i) = ( global_start(j,i) - 1 ) / ( nr_surfcells_pe + 1 ) 4087 ELSE 4088 target_thread(j,i) = ( global_start(j,i) - rest_bound - 1 ) / nr_surfcells_pe 4089 target_thread(j,i) = target_thread(j,i) + rest_cells_pe 4090 ENDIF 4091 ! 4092 !-- TODO: Test output, to be removed later. 4093 IF ( target_thread(j,i) >= numprocs ) THEN 4094 WRITE( 9,'(A,8I8)' ) 'target_thread ', j, i, target_thread(j,i), & 4095 global_start(j,i) , nr_surfcells_pe 4096 FLUSH( 9 ) 4097 CALL MPI_ABORT( comm2d, 1, ierr ) 4098 ENDIF 4099 ENDIF 4100 nr_cells_to_thread(target_thread(j,i)) = nr_cells_to_thread(target_thread(j,i)) + 1 4101 nr_values_to_thread(target_thread(j,i)) = nr_values_to_thread(target_thread(j,i)) + & 4102 e_end_index(j,i) - e_start_index(j,i) + 1 4103 ENDDO 4104 ENDDO 4105 4106 ! 4107 !-- Compute start index in the transfer buffer on the source side for the corresponding target PE. 4108 thread_index(0) = 1 4109 thread_values(0) = 1 4110 DO n = 1, numprocs-1 4111 thread_index(n) = thread_index(n-1) + nr_cells_to_thread(n-1) 4112 thread_values(n) = thread_values(n-1) + nr_values_to_thread(n-1) 4113 ENDDO 4114 ! 4115 !-- Buffer distribution on the source side. 4116 DO n = 0, numprocs-1 4117 transfer_index_s(1,n) = thread_index(n) 4118 transfer_index_s(2,n) = nr_cells_to_thread(n) 4119 transfer_index_s(3,n) = thread_values(n) 4120 transfer_index_s(4,n) = nr_values_to_thread(n) 4121 ENDDO 4122 4123 CALL MPI_ALLTOALL( transfer_index_s, 4, MPI_INTEGER, transfer_index, 4, MPI_INTEGER, comm2d, & 4124 ierr) 4125 ! 4126 !-- Buffer distribution on the target side side. 4127 CALL get_remote_indices() 4128 ! 4129 !-- Create surface element file type. 4130 IF ( total_number_of_surface_values > 0 .AND. .NOT. only_dummy_values) THEN 4131 data_to_write = .TRUE. 4132 ELSE 4133 data_to_write = .FALSE. 4134 ENDIF 4135 4136 CALL MPI_ALLREDUCE( global_end(nyn,nxr), dims1(1), 1, MPI_INTEGER, MPI_MAX, comm2d, ierr ) 4137 start1(1) = MINVAL( local_indices(1,:) ) - 1 4138 IF ( sm_io%is_sm_active() ) THEN 4139 CALL MPI_ALLREDUCE( SUM( local_indices(2,:) ), lsize1(1), 1, MPI_INTEGER, MPI_SUM, & 4140 sm_io%comm_shared, ierr ) 4141 ELSE 4142 lsize1(1) = SUM( local_indices(2,:) ) 4143 ENDIF 4144 4145 IF ( sm_io%iam_io_pe ) THEN 4146 IF ( total_number_of_surface_values > 0 ) THEN 4147 CALL MPI_TYPE_CREATE_SUBARRAY( 1, dims1, lsize1, start1, MPI_ORDER_FORTRAN, MPI_REAL, & 4148 ft_surf, ierr ) 4149 CALL MPI_TYPE_COMMIT( ft_surf, ierr ) 4150 ENDIF 4151 ENDIF 4152 ! 4153 !-- Allocate rma window to supply surface data to other PEs. 4154 CALL rd_alloc_rma_mem( array_1d, SUM( nr_values_to_thread ), win_surf ) 4155 ! 4156 !-- Allocate shared array on IO-PE to supply data for MPI-IO (write or read). 4157 IF ( sm_io%is_sm_active() ) THEN 4158 IF ( sm_io%iam_io_pe ) THEN 4159 io_start_index = start1(1) + 1 4160 io_end_index = start1(1) + lsize1(1) 4161 ENDIF 4162 CALL MPI_BCAST( io_start_index, 1, MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 4163 CALL MPI_BCAST( io_end_index, 1, MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 4164 CALL sm_io%sm_allocate_shared( array_out, io_start_index, io_end_index, win_out ) 4165 ELSE 4166 ALLOCATE( array_out(start1(1)+1:start1(1)+lsize1(1)) ) 4167 ENDIF 4168 #else 4169 IF ( total_number_of_surface_values > 0 .AND. .NOT. only_dummy_values ) THEN 4170 data_to_write = .TRUE. 4171 ELSE 4172 data_to_write = .FALSE. 4173 ENDIF 4174 ALLOCATE( array_out(1:total_number_of_surface_values) ) 4175 #endif 4176 4177 CONTAINS 4178 4179 SUBROUTINE cyclic_fill_surface_filetype 4180 4181 INTEGER(iwp) :: i !< loop index 4182 INTEGER(iwp) :: j !< loop index 4183 4184 4185 IF ( .NOT. ALLOCATED( o_start_index ) ) ALLOCATE( o_start_index(nys:nyn,nxl:nxr) ) 4186 IF ( .NOT. ALLOCATED( o_end_index ) ) ALLOCATE( o_end_index(nys:nyn,nxl:nxr) ) 4187 4188 lo_start = 1 4189 DO i = nxl, nxr 4190 DO j = nys, nyn 4191 o_start_index(j,i) = lo_start 4192 o_end_index(j,i) = lo_start 4193 lo_start = lo_start + 1 4194 ENDDO 4195 ENDDO 4196 start_index = o_start_index 4197 end_index = o_end_index 4198 4199 IF ( MAXVAL( global_end-global_start ) > 1 ) THEN 4200 message_string = 'cylic-fill method does not allow more than one surface element ' // & 4201 'per grid box' 4202 CALL message( 'cyclic_fill_surface_filetype', 'PA0742', 1, 2, 0, 6, 0 ) 4203 ENDIF 4204 ! 4205 !-- Activate grid of the smaller prerun, i.e. grid variables like nxl, nxr, nys, nyn and others 4206 !-- are set according to the prerun layout. 4207 CALL prerun_grid%activate_grid_from_this_class() 4208 4209 IF ( pe_active_for_read ) THEN 4210 4211 IF ( .NOT. ALLOCATED( c_global_start ) ) ALLOCATE( c_global_start(nys:nyn,nxl:nxr) ) 4212 IF ( .NOT. ALLOCATED( c_global_end ) ) ALLOCATE( c_global_end(nys:nyn,nxl:nxr) ) 4213 IF ( .NOT. ALLOCATED( c_start_index ) ) ALLOCATE( c_start_index(nys:nyn,nxl:nxr) ) 4214 IF ( .NOT. ALLOCATED( c_end_index ) ) ALLOCATE( c_end_index(nys:nyn,nxl:nxr) ) 4215 4216 DO i = nxl, nxr 4217 DO j = nys, nyn 4218 c_global_start(j,i) = global_start(j,i) 4219 c_global_end(j,i) = global_end(j,i) 3983 4220 ENDDO 3984 ELSE 3985 local_start = 0 3986 DO i = myid-sm_io%sh_rank, myid-1 3987 local_start = local_start + all_nr_val(i) 3988 ENDDO 4221 ENDDO 4222 ! 4223 !-- Recursive call of rd_mpi_io_surface_filetypes. 4224 !-- Prerun data are read, but they are treated as if they are mainrun data, just on a smaller 4225 !-- grid. 4226 cyclic_fill_mode = .FALSE. 4227 CALL rd_mpi_io_surface_filetypes( c_start_index, c_end_index, data_to_write, & 4228 c_global_start, c_global_end ) 4229 cyclic_fill_mode = .TRUE. 4230 4231 ENDIF 4232 ! 4233 !-- Activate grid of the mainrun, i.e. grid variables like nxl, nxr, nys, nyn and others 4234 !-- are set according to the mainrun layout. 4235 CALL mainrun_grid%activate_grid_from_this_class() 4236 4237 #if defined( __parallel ) 4238 CALL MPI_BCAST( data_to_write, 1, MPI_LOGICAL, 0, comm2d, ierr ) 4239 #endif 4240 4241 END SUBROUTINE cyclic_fill_surface_filetype 4242 4243 #if defined( __parallel ) 4244 ! 4245 !-- Get the indices of the surface elements inside the RMA window on the remote PE. 4246 !-- This information is required to fetch the surface element data on remote PEs 4247 !-- in rrd_mpi_io_surface and wrd_mpi_io_surface. 4248 SUBROUTINE get_remote_indices 4249 4250 INTEGER(iwp) :: buf_start !< 4251 INTEGER(iwp) :: i !< 4252 INTEGER(iwp) :: j !< 4253 INTEGER(iwp) :: n !< 4254 INTEGER(iwp) :: n_trans !< 4255 INTEGER(iwp) :: win_ind !< 4256 4257 INTEGER(KIND=MPI_ADDRESS_KIND) :: disp !< displacement in RMA window 4258 INTEGER(KIND=MPI_ADDRESS_KIND) :: winsize !< size of RMA window 4259 4260 INTEGER(iwp), DIMENSION(0:numprocs-1) :: lo_index !< 4261 4262 INTEGER(iwp), POINTER, DIMENSION(:,:) :: surf_val_index !< 4263 4264 4265 IF ( ALLOCATED( local_indices ) ) DEALLOCATE( local_indices ) 4266 ALLOCATE( local_indices(2,MAX( SUM( transfer_index(2,:) ), 2 ))) 4267 4268 local_indices(1,:) = 0 4269 local_indices(2,:) = 0 4270 4271 winsize = MAX( 2 * SUM( nr_cells_to_thread ), 2 ) 4272 4273 ALLOCATE( surf_val_index(2,winsize) ) 4274 winsize = winsize * iwp 4275 CALL MPI_WIN_CREATE( surf_val_index, winsize, iwp, MPI_INFO_NULL, comm2d, win_ind, ierr ) 4276 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4277 4278 lo_index = thread_index 4279 DO i = nxl, nxr 4280 DO j = nys, nyn 4281 surf_val_index(1,lo_index(target_thread(j,i))) = global_start(j,i) 4282 surf_val_index(2,lo_index(target_thread(j,i))) = global_end(j,i) - global_start(j,i) & 4283 + 1 4284 lo_index(target_thread(j,i)) = lo_index(target_thread(j,i)) + 1 4285 ENDDO 4286 ENDDO 4287 4288 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4289 4290 buf_start = 1 4291 DO n = 0, numprocs-1 4292 n_trans = transfer_index(2,n) 4293 IF ( n_trans > 0 ) THEN 4294 disp = 2 * ( transfer_index(1,n) - 1 ) 4295 CALL MPI_GET( local_indices(1,buf_start), 2*n_trans, MPI_INTEGER, n, disp, 2*n_trans, & 4296 MPI_INTEGER, win_ind, ierr ) 4297 buf_start = buf_start + n_trans 3989 4298 ENDIF 3990 ! 3991 !-- Get the size of shared memory window on all PEs 3992 CALL MPI_BCAST( nr_iope, 1, MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 3993 CALL sm_io%sm_allocate_shared( array_1d, 1, MAX( 1, nr_iope ), win_surf ) 3994 ELSE 3995 nr_iope = nr_val 3996 ENDIF 3997 #else 3998 nr_iope = nr_val 3999 #endif 4000 4001 ! 4002 !-- Check, if surface data exist on this PE 4003 data_to_write = .TRUE. 4004 IF ( total_number_of_surface_values == 0 ) THEN 4005 data_to_write = .FALSE. 4006 RETURN 4007 ENDIF 4008 4009 IF ( sm_io%iam_io_pe ) THEN 4010 4011 all_pes_write = ( MINVAL( all_nr_val ) > 0 ) 4012 4013 IF ( all_pes_write ) THEN 4014 dims1(1) = total_number_of_surface_values 4015 lize1(1) = nr_iope 4016 start1(1) = glo_start-1 4017 4018 #if defined( __parallel ) 4019 IF ( total_number_of_surface_values > 0 ) THEN 4020 CALL MPI_TYPE_CREATE_SUBARRAY( 1, dims1, lize1, start1, MPI_ORDER_FORTRAN, & 4021 MPI_REAL, ft_surf, ierr ) 4022 CALL MPI_TYPE_COMMIT( ft_surf, ierr ) 4023 ENDIF 4024 #endif 4299 ENDDO 4300 4301 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4302 4303 buf_start = 1 4304 DO n = 0, numprocs-1 4305 n_trans = transfer_index(2,n) 4306 IF ( n_trans > 0 ) THEN 4307 disp = transfer_index(1,n) - 1 4308 buf_start = buf_start + n_trans 4025 4309 ENDIF 4026 ENDIF 4027 4028 ENDIF 4310 ENDDO 4311 4312 CALL MPI_WIN_FREE( win_ind, ierr ) 4313 4314 DEALLOCATE( surf_val_index ) 4315 4316 END SUBROUTINE get_remote_indices 4317 4318 !--------------------------------------------------------------------------------------------------! 4319 ! Description: 4320 ! ------------ 4321 !> Allocate memory and create window for one-sided communication (1-d INTEGER array) 4322 !--------------------------------------------------------------------------------------------------! 4323 SUBROUTINE rd_alloc_rma_mem( array, idim, win ) 4324 4325 IMPLICIT NONE 4326 4327 INTEGER(iwp), INTENT(IN) :: idim !< Dimension of this 1-D array 4328 INTEGER :: ierr !< MPI error code 4329 INTEGER(iwp), INTENT(OUT) :: win !< MPI window 4330 INTEGER(KIND=MPI_ADDRESS_KIND) :: winsize !< size of RMA window 4331 4332 REAL(wp), DIMENSION(:), POINTER, INTENT(INOUT) :: array !< array to access RMA window locally 4333 4334 4335 winsize = MAX( idim, 2 ) 4336 ALLOCATE( array(winsize) ) 4337 winsize = winsize * wp 4338 CALL MPI_WIN_CREATE( array, winsize, wp, MPI_INFO_NULL, comm2d, win, ierr ) 4339 array = -1 4340 CALL MPI_WIN_FENCE( 0, win, ierr ) 4341 4342 END SUBROUTINE rd_alloc_rma_mem 4343 #endif 4029 4344 4030 4345 END SUBROUTINE rd_mpi_io_surface_filetypes 4031 4032 4346 4033 4347 … … 4079 4393 iog%nnx = iog%nnx + nbgp 4080 4394 ENDIF 4081 IF ( myidx == npex-1 .OR. npex == -1 ) THEN ! npex == 1 if -D__parallel not set4395 IF ( myidx == pdims(1)-1 ) THEN 4082 4396 iog%nxr = iog%nxr + nbgp 4083 4397 iog%nnx = iog%nnx + nbgp … … 4087 4401 iog%nny = iog%nny + nbgp 4088 4402 ENDIF 4089 IF ( myidy == npey-1 .OR. npey == -1 ) THEN ! npey == 1 if -D__parallel not set4403 IF ( myidy == pdims(2)-1 ) THEN 4090 4404 iog%nyn = iog%nyn + nbgp 4091 4405 iog%nny = iog%nny + nbgp … … 4251 4565 iog%nnx = iog%nnx + nbgp 4252 4566 ENDIF 4253 IF ( myidx == npex-1 .OR. npex == -1 ) THEN ! npex == 1 if -D__parallel not set4567 IF ( myidx == pdims(1)-1 ) THEN 4254 4568 iog%nxr = iog%nxr + nbgp 4255 4569 iog%nnx = iog%nnx + nbgp … … 4259 4573 iog%nny = iog%nny + nbgp 4260 4574 ENDIF 4261 IF ( myidy == npey-1 .OR. npey == -1 ) THEN ! npey == 1 if -D__parallel not set4575 IF ( myidy == pdims(2)-1 ) THEN 4262 4576 iog%nyn = iog%nyn + nbgp 4263 4577 iog%nny = iog%nny + nbgp … … 4326 4640 !> to a single file that contains the global arrays. It is not required for the serial mode. 4327 4641 !--------------------------------------------------------------------------------------------------! 4328 #if defined( __parallel )4329 4642 SUBROUTINE rd_mpi_io_create_filetypes_3dsoil( nzb_soil, nzt_soil ) 4330 4643 … … 4334 4647 INTEGER, INTENT(IN) :: nzt_soil !< 4335 4648 4649 #if defined( __parallel ) 4336 4650 INTEGER, DIMENSION(3) :: dims3 !< 4337 4651 INTEGER, DIMENSION(3) :: lize3 !< … … 4367 4681 CALL MPI_TYPE_COMMIT( ft_3dsoil, ierr ) 4368 4682 ENDIF 4683 #else 4684 ALLOCATE( array_3d_soil(nzb_soil:nzt_soil,iog%nxl:iog%nxr,iog%nys:iog%nyn) ) 4685 sm_io%io_grid = iog 4686 #endif 4369 4687 4370 4688 END SUBROUTINE rd_mpi_io_create_filetypes_3dsoil 4371 #endif4372 4373 4374 4689 4375 4690 !--------------------------------------------------------------------------------------------------! … … 4381 4696 4382 4697 IMPLICIT NONE 4383 4384 4698 4385 4699 #if defined( __parallel ) … … 4401 4715 4402 4716 ENDIF 4717 4403 4718 ! 4404 4719 !-- Free last surface filetype … … 4415 4730 IF ( sm_io%iam_io_pe .AND. ft_3di4 /= -1 ) THEN 4416 4731 CALL MPI_TYPE_FREE( ft_3di4, ierr ) 4732 ft_3di4 = -1 4733 ENDIF 4734 IF ( sm_io%iam_io_pe .AND. ft_3di8 /= -1 ) THEN 4417 4735 CALL MPI_TYPE_FREE( ft_3di8, ierr ) 4736 ft_3di8 = -1 4418 4737 ENDIF 4419 4738 4420 4739 IF ( sm_io%is_sm_active() .AND. win_3di4 /= -1 ) THEN 4421 4740 CALL sm_io%sm_free_shared( win_3di4 ) 4741 win_3di4 = -1 4742 ENDIF 4743 IF ( sm_io%is_sm_active() .AND. win_3di8 /= -1 ) THEN 4422 4744 CALL sm_io%sm_free_shared( win_3di8 ) 4745 win_3di8 = -1 4746 ENDIF 4747 4748 IF ( win_start /= -1 ) THEN 4749 CALL sm_io%sm_free_shared( win_start) 4750 CALL sm_io%sm_free_shared( win_end) 4751 CALL sm_io%sm_free_shared( win_glost) 4752 win_start = -1 4753 win_end = -1 4754 win_glost = -1 4423 4755 ENDIF 4424 4756 … … 4426 4758 win_surf = -1 4427 4759 #else 4428 IF ( ASSOCIATED( array_2d) ) DEALLOCATE( array_2d )4429 IF ( ASSOCIATED( array_2di) ) DEALLOCATE( array_2di )4430 IF ( ASSOCIATED( array_3d) ) DEALLOCATE( array_3d )4431 IF ( ASSOCIATED( array_3di4) ) DEALLOCATE( array_3di4 )4432 IF ( ASSOCIATED( array_3di8) ) DEALLOCATE( array_3di8 )4760 IF ( ASSOCIATED( array_2d ) ) DEALLOCATE( array_2d ) 4761 IF ( ASSOCIATED( array_2di ) ) DEALLOCATE( array_2di ) 4762 IF ( ASSOCIATED( array_3d ) ) DEALLOCATE( array_3d ) 4763 IF ( ASSOCIATED( array_3di4 ) ) DEALLOCATE( array_3di4 ) 4764 IF ( ASSOCIATED( array_3di8 ) ) DEALLOCATE( array_3di8 ) 4433 4765 #endif 4434 4766 -
palm/trunk/SOURCE/shared_memory_io_mod.f90
r4828 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance 28 ! 29 ! 4828 2021-01-05 11:21:41Z Giersch 27 30 ! additions for output of particle time series 28 31 ! … … 39 42 ! unused variable removed 40 43 ! 41 !42 44 ! Additions for cyclic fill mode 43 !44 45 ! 45 46 ! File re-formatted to follow the PALM coding standard … … 47 48 ! 48 49 ! Initial version (Klaus Ketelsen) 49 !50 !51 50 ! 52 51 ! Description: … … 100 99 comm1dy, & 101 100 comm2d, & 101 comm_palm, & 102 102 ierr, & 103 103 myid, & … … 120 120 #endif 121 121 122 USE transpose_indices, & 123 ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nys_x, nys_z, nyn_x, nyn_z, nzb_x, nzb_y, nzt_x, nzt_y 124 122 125 IMPLICIT NONE 123 126 … … 149 152 END TYPE domain_decomposition_grid_features 150 153 154 TYPE, PUBLIC :: sm_remote_array 155 156 TYPE(C_PTR) :: rem_ptr !< 157 INTEGER(iwp) :: d1e !< 158 INTEGER(iwp) :: d1s !< 159 INTEGER(iwp) :: d2e !< 160 INTEGER(iwp) :: d2s !< 161 INTEGER(iwp) :: d3e !< 162 INTEGER(iwp) :: d3s !< 163 INTEGER(iwp) :: d4e !< 164 INTEGER(iwp) :: d4s !< 165 166 END TYPE sm_remote_array 167 151 168 ! 152 169 !-- Class definition for shared memory instances. … … 164 181 INTEGER(iwp), PUBLIC :: sh_rank !< 165 182 166 LOGICAL, PUBLIC :: iam_io_pe = .TRUE. !< This PE is an IO-PE167 183 ! 168 184 !-- Variables for the I/O virtual grid 169 INTEGER(iwp), PUBLIC :: comm_io !< Communicator for all IO processes185 INTEGER(iwp), PUBLIC :: comm_io !< communicator for all IO processes 170 186 INTEGER(iwp), PUBLIC :: io_npes !< 171 187 INTEGER(iwp), PUBLIC :: io_rank !< 172 188 ! 173 189 !-- Variables for the node local communicator 174 INTEGER(iwp) :: comm_node !< Communicator for all processes of current node190 INTEGER(iwp) :: comm_node !< communicator for all processes of current node 175 191 INTEGER(iwp) :: io_pe_global_rank !< 176 192 INTEGER(iwp) :: n_npes !< 177 193 INTEGER(iwp) :: n_rank !< 178 194 179 TYPE(domain_decomposition_grid_features), PUBLIC :: io_grid !< io grid features, depending on reading from prerun or restart run 180 195 LOGICAL, PUBLIC :: is_root_pe !< 196 LOGICAL, PUBLIC :: iam_io_pe = .TRUE. !< this PE is an IO-PE 197 198 TYPE(domain_decomposition_grid_features), PUBLIC :: io_grid !< io grid features, depending on reading from prerun or main run 181 199 182 200 CONTAINS … … 191 209 PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier 192 210 #if defined( __parallel ) 211 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_32 193 212 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_64 194 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_32195 213 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1di 214 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_32 196 215 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_64 197 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_32198 216 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di 217 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_32 199 218 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_64 200 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_32 219 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_4d_32 220 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_4d_64 201 221 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3di_32 202 222 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3di_64 223 PROCEDURE, PASS(this), PUBLIC :: sm_all_allocate_shared_3d_64 203 224 204 225 GENERIC, PUBLIC :: sm_allocate_shared => & 205 sm_allocate_shared_1d_64, sm_allocate_shared_1d_32, & 206 sm_allocate_shared_2d_64, sm_allocate_shared_2d_32, & 207 sm_allocate_shared_2di, sm_allocate_shared_3d_64, & 208 sm_allocate_shared_3d_32, sm_allocate_shared_1di, & 209 sm_allocate_shared_3di_32, sm_allocate_shared_3di_64 226 sm_allocate_shared_1d_64, sm_allocate_shared_1d_32, & 227 sm_allocate_shared_2d_64, sm_allocate_shared_2d_32, & 228 sm_allocate_shared_2di, sm_allocate_shared_3d_64, & 229 sm_allocate_shared_4d_64, sm_allocate_shared_4d_32, & 230 sm_allocate_shared_3d_32, sm_allocate_shared_1di, & 231 sm_allocate_shared_3di_32, sm_allocate_shared_3di_64 232 233 GENERIC, PUBLIC :: sm_all_allocate_shared => sm_all_allocate_shared_3d_64 210 234 #endif 211 235 END TYPE sm_class … … 226 250 227 251 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 228 INTEGER , INTENT(IN), OPTIONAL:: comm_input !< main model communicator (comm2d) can optional be set252 INTEGER(iwp), INTENT(IN), OPTIONAL :: comm_input !< main model communicator (comm2d) can optional be set 229 253 230 254 #if defined( __parallel ) 231 INTEGER :: color 232 INTEGER :: max_n _npes!< maximum number of PEs/node255 INTEGER :: color !< 256 INTEGER :: max_npes_per_node !< maximum number of PEs/node 233 257 #endif 234 258 … … 237 261 this%nr_io_pe_per_node = 2 238 262 263 #if defined( __parallel ) 239 264 IF ( PRESENT( comm_input ) ) THEN 240 265 this%comm_model = comm_input … … 248 273 IF ( this%no_shared_memory_in_this_run ) THEN 249 274 this%iam_io_pe = .TRUE. 275 this%sh_rank = 0 276 this%sh_npes = 1 250 277 RETURN 251 278 ENDIF 252 279 253 #if defined( __parallel ) 254 ! 255 !-- Determine, how many MPI threads are running on a node 280 ! 281 !-- Determine, how many PEs are running on a node. 256 282 this%iam_io_pe = .FALSE. 257 283 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & … … 260 286 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 261 287 262 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr ) 288 CALL MPI_ALLREDUCE( this%n_npes, max_npes_per_node, 1, MPI_INTEGER, MPI_MAX, this%comm_model, & 289 ierr ) 263 290 ! 264 291 !-- Decide, if the configuration can run with shared-memory IO 265 IF ( max_n _npes> 64 ) THEN292 IF ( max_npes_per_node > 64 ) THEN 266 293 ! 267 294 !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node 268 295 this%nr_io_pe_per_node = 4 269 296 270 ELSEIF ( max_n _npes <= 32) THEN271 ! 272 !-- No shared memory IO with less than 3 2 threads/node297 ELSEIF ( max_npes_per_node <= 3 ) THEN 298 ! 299 !-- No shared memory IO with less than 3 MPI tasks/node 273 300 this%no_shared_memory_in_this_run = .TRUE. 274 301 this%iam_io_pe = .TRUE. … … 277 304 278 305 ! 279 !-- No shared memory IO with small setups 280 IF ( nx < 24 .OR. ny < 24) THEN306 !-- No shared memory IO with small setups. 307 IF ( nx < 16 .OR. ny < 16 ) THEN 281 308 this%no_shared_memory_in_this_run = .TRUE. 282 309 this%iam_io_pe = .TRUE. … … 299 326 ! 300 327 !-- Setup the communicator across the nodes depending on the shared memory rank. 301 !-- All threads with shared memory rank 0 will be I/O threads.328 !-- All PEs with shared memory rank 0 will be I/O PEs. 302 329 color = this%sh_rank 303 330 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) … … 316 343 ENDIF 317 344 CALL MPI_BCAST( this%io_pe_global_rank, 1, MPI_INTEGER, 0, this%comm_shared, ierr ) 318 319 345 #else 320 this%iam_io_pe = .TRUE. 346 this%iam_io_pe = .TRUE. 347 this%comm_model = comm2d 348 this%sh_rank = 0 349 this%sh_npes = 1 350 this%no_shared_memory_in_this_run = .TRUE. 321 351 #endif 322 323 ! write(9,'(a,8i7)') ' end of sm_init_comm ',this%sh_rank,this%sh_npes,this%io_rank,this%io_npes,this%io_pe_global_rank324 ! write(9,*) 'This process is IO Process ',this%iam_io_pe325 352 326 353 #if defined( __parallel ) … … 362 389 CALL MPI_ALLREDUCE( local_dim_s, local_dim_r, SIZE( local_dim_s ), MPI_INTEGER, MPI_SUM, & 363 390 this%comm_node, ierr ) 364 sh_group_size = ( max_n _npes+ this%nr_io_pe_per_node - 1 ) / this%nr_io_pe_per_node391 sh_group_size = ( max_npes_per_node + this%nr_io_pe_per_node - 1 ) / this%nr_io_pe_per_node 365 392 366 393 pe = 0 … … 417 444 END SUBROUTINE sm_init_comm 418 445 419 420 446 ! 421 447 !-- Initializing setup for output of particle time series. … … 428 454 429 455 #if defined( __parallel ) 430 INTEGER(iwp) :: color !<431 INTEGER(iwp) :: ierr !<432 INTEGER(iwp) :: max_n _npes!< maximum number of PEs/node456 INTEGER(iwp) :: color !< 457 INTEGER(iwp) :: ierr !< 458 INTEGER(iwp) :: max_npes_per_node !< maximum number of PEs/node 433 459 #endif 434 460 … … 451 477 #if defined( __parallel ) 452 478 ! 453 !-- Determine, how many MPI threads are running on a node479 !-- Determine, how many PEs are running on a node. 454 480 this%iam_io_pe = .FALSE. 455 481 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & … … 458 484 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 459 485 460 CALL MPI_ALLREDUCE( this%n_npes, max_n _npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr )461 486 CALL MPI_ALLREDUCE( this%n_npes, max_npes_per_node, 1, MPI_INTEGER, MPI_MAX, this%comm_model, & 487 ierr ) 462 488 ! 463 489 !-- TODO: better explanation … … 465 491 !-- even better to use the complete node for MPI shared memory (this%nr_io_pe_per_node = 1). 466 492 !- In the latter case, the access to the MPI shared memory buffer is slower, the number of 467 !-- particles to move between threads will be much smaller.468 IF ( max_n _npes> 64 ) THEN493 !-- particles to move between PEs will be much smaller. 494 IF ( max_npes_per_node > 64 ) THEN 469 495 ! 470 496 !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node … … 526 552 ! 527 553 !-- Setup the communicator across the nodes depending on the shared memory rank. 528 !-- All threads with shared memory rank 0 will be I/O threads.554 !-- All PEs with shared memory rank 0 will be I/O PEs. 529 555 color = this%sh_rank 530 556 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) … … 573 599 ! Description: 574 600 ! ------------ 575 !> Allocate shared 1d-REAL (64 Bit) array on ALL threads601 !> Allocate shared 1d-REAL (64 bit) array on PE 0 and pass address to all PEs. 576 602 !--------------------------------------------------------------------------------------------------! 577 603 SUBROUTINE sm_allocate_shared_1d_64( this, p1, d1, d2, win ) … … 590 616 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 591 617 592 INTEGER , DIMENSION(1):: buf_shape618 INTEGER(iwp), DIMENSION(1) :: buf_shape 593 619 594 620 REAL(dp), DIMENSION(:), POINTER :: buf … … 601 627 IF ( this%no_shared_memory_in_this_run ) RETURN 602 628 ! 603 !-- Allocate shared memory on node rank 0 threads.629 !-- Allocate shared memory on node rank 0 PEs. 604 630 IF ( this%sh_rank == pe_from ) THEN 605 631 wsize = d2 - d1 + 1 … … 629 655 ! Description: 630 656 ! ------------ 631 !> Allocate shared 1d-REAL (32 Bit) array on ALL threads657 !> Allocate shared 1d-REAL (32 bit) array on PE 0 and pass address to all PEs 632 658 !--------------------------------------------------------------------------------------------------! 633 659 SUBROUTINE sm_allocate_shared_1d_32( this, p1, d1, d2, win ) … … 646 672 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 647 673 648 INTEGER , DIMENSION(1):: buf_shape674 INTEGER(iwp), DIMENSION(1) :: buf_shape 649 675 650 676 REAL(sp), DIMENSION(:), POINTER :: buf … … 657 683 IF ( this%no_shared_memory_in_this_run ) RETURN 658 684 ! 659 !-- Allocate shared memory on node rank 0 threads.685 !-- Allocate shared memory on node rank 0 PEs. 660 686 IF ( this%sh_rank == pe_from ) THEN 661 687 wsize = d2 - d1 + 1 … … 685 711 ! Description: 686 712 ! ------------ 687 !> Allocate shared 1d-INTEGER array on ALL threads713 !> Allocate shared 1d-INTEGER array on PE 0 and pass address to all PEs. 688 714 !--------------------------------------------------------------------------------------------------! 689 715 SUBROUTINE sm_allocate_shared_1di( this, p1, d1, d2, win ) … … 702 728 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 703 729 704 INTEGER , DIMENSION(1):: buf_shape730 INTEGER(iwp), DIMENSION(1) :: buf_shape 705 731 706 732 INTEGER(iwp), DIMENSION(:), POINTER :: buf … … 713 739 IF ( this%no_shared_memory_in_this_run ) RETURN 714 740 ! 715 !-- Allocate shared memory on node rank 0 threads.741 !-- Allocate shared memory on node rank 0 PEs. 716 742 IF ( this%sh_rank == pe_from ) THEN 717 743 wsize = d2 - d1 + 1 … … 741 767 ! Description: 742 768 ! ------------ 743 !> Allocate shared 2d-REAL array on ALL threads (64 Bit)769 !> Allocate shared 2d-REAL array (64 bit) on PE 0 and pass address to all PEs. 744 770 !--------------------------------------------------------------------------------------------------! 745 771 SUBROUTINE sm_allocate_shared_2d_64( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) … … 771 797 IF ( this%no_shared_memory_in_this_run ) RETURN 772 798 ! 773 !-- Allocate shared memory on node rank 0 threads.799 !-- Allocate shared memory on node rank 0 PEs. 774 800 IF ( this%sh_rank == pe_from ) THEN 775 801 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) … … 801 827 ! Description: 802 828 ! ------------ 803 !> Allocate shared 2d-REAL (32 Bit) array on ALL threads829 !> Allocate shared 2d-REAL (32 Bit) array on PE 0 and pass address to all PEs. 804 830 !--------------------------------------------------------------------------------------------------! 805 831 SUBROUTINE sm_allocate_shared_2d_32( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) … … 831 857 IF ( this%no_shared_memory_in_this_run ) RETURN 832 858 ! 833 !-- Allocate shared memory on node rank 0 threads.859 !-- Allocate shared memory on node rank 0 PEs. 834 860 IF ( this%sh_rank == pe_from ) THEN 835 861 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) … … 861 887 ! Description: 862 888 ! ------------ 863 !> Allocate shared 2d-INTEGER array on ALL threads889 !> Allocate shared 2d-INTEGER array on PE 0 and pass address to all PEs. 864 890 !--------------------------------------------------------------------------------------------------! 865 891 SUBROUTINE sm_allocate_shared_2di( this, p2i, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) … … 891 917 IF ( this%no_shared_memory_in_this_run ) RETURN 892 918 ! 893 !-- Allocate shared memory on node rank 0 threads.919 !-- Allocate shared memory on node rank 0 PEs. 894 920 IF ( this%sh_rank == pe_from ) THEN 895 921 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) … … 921 947 ! Description: 922 948 ! ------------ 923 !> Allocate shared 3d-REAL (64 Bit) array on ALL threads949 !> Allocate shared 3d-REAL (64 bit) array on PE 0 and pass address to all PEs. 924 950 !--------------------------------------------------------------------------------------------------! 925 951 SUBROUTINE sm_allocate_shared_3d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 929 955 CLASS(sm_class), INTENT(inout) :: this !< 930 956 931 INTEGER 932 INTEGER , INTENT(IN):: d1e !<933 INTEGER , INTENT(IN):: d1s !<934 INTEGER , INTENT(IN):: d2e !<935 INTEGER , INTENT(IN):: d2s !<936 INTEGER , INTENT(IN):: d3e !<937 INTEGER , INTENT(IN):: d3s !<938 INTEGER , SAVE:: pe_from = 0 !<939 INTEGER , INTENT(OUT):: win !<957 INTEGER(iwp) :: disp_unit !< 958 INTEGER(iwp), INTENT(IN) :: d1e !< 959 INTEGER(iwp), INTENT(IN) :: d1s !< 960 INTEGER(iwp), INTENT(IN) :: d2e !< 961 INTEGER(iwp), INTENT(IN) :: d2s !< 962 INTEGER(iwp), INTENT(IN) :: d3e !< 963 INTEGER(iwp), INTENT(IN) :: d3s !< 964 INTEGER(iwp), SAVE :: pe_from = 0 !< 965 INTEGER(iwp), INTENT(OUT) :: win !< 940 966 941 967 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 942 968 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 943 969 944 INTEGER , DIMENSION(3):: buf_shape !<970 INTEGER(iwp), DIMENSION(3) :: buf_shape !< 945 971 946 972 REAL(dp), DIMENSION(:,:,:), POINTER :: buf !< … … 953 979 IF ( this%no_shared_memory_in_this_run ) RETURN 954 980 ! 955 !-- Allocate shared memory on node rank 0 threads.981 !-- Allocate shared memory on node rank 0 PEs. 956 982 IF ( this%sh_rank == pe_from ) THEN 957 983 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 984 1010 ! Description: 985 1011 ! ------------ 986 !> Allocate shared 3d-REAL (32 Bit) array on ALL threads1012 !> Allocate shared 3d-REAL (32 bit) array on PE 0 and pass address to all PEs. 987 1013 !--------------------------------------------------------------------------------------------------! 988 1014 SUBROUTINE sm_allocate_shared_3d_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 992 1018 CLASS(sm_class), INTENT(inout) :: this 993 1019 994 INTEGER 995 INTEGER , INTENT(IN):: d1e996 INTEGER , INTENT(IN):: d1s997 INTEGER , INTENT(IN):: d2e998 INTEGER , INTENT(IN):: d2s999 INTEGER , INTENT(IN):: d3e1000 INTEGER , INTENT(IN):: d3s1001 INTEGER , SAVE:: pe_from = 01002 INTEGER , INTENT(OUT):: win1020 INTEGER(iwp) :: disp_unit 1021 INTEGER(iwp), INTENT(IN) :: d1e 1022 INTEGER(iwp), INTENT(IN) :: d1s 1023 INTEGER(iwp), INTENT(IN) :: d2e 1024 INTEGER(iwp), INTENT(IN) :: d2s 1025 INTEGER(iwp), INTENT(IN) :: d3e 1026 INTEGER(iwp), INTENT(IN) :: d3s 1027 INTEGER(iwp), SAVE :: pe_from = 0 1028 INTEGER(iwp), INTENT(OUT) :: win 1003 1029 1004 1030 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 1005 1031 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 1006 1032 1007 INTEGER , DIMENSION(3):: buf_shape1033 INTEGER(iwp), DIMENSION(3) :: buf_shape 1008 1034 1009 1035 REAL(sp), DIMENSION(:,:,:), POINTER :: buf … … 1016 1042 IF ( this%no_shared_memory_in_this_run ) RETURN 1017 1043 ! 1018 !-- Allocate shared memory on node rank 0 threads.1044 !-- Allocate shared memory on node rank 0 PEs. 1019 1045 IF ( this%sh_rank == pe_from ) THEN 1020 1046 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 1047 1073 ! Description: 1048 1074 ! ------------ 1049 !> Allocate shared 3d-REAL (32 bit) array on ALL threads 1075 !> Allocate shared 4d-REAL (64 bit) array on PE 0 and pass address to all PEs. 1076 !--------------------------------------------------------------------------------------------------! 1077 SUBROUTINE sm_allocate_shared_4d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, d4s, d4e, win ) 1078 1079 IMPLICIT NONE 1080 1081 CLASS(sm_class), INTENT(inout) :: this !< 1082 1083 INTEGER :: disp_unit !< 1084 INTEGER(iwp), INTENT(IN) :: d1e !< 1085 INTEGER(iwp), INTENT(IN) :: d1s !< 1086 INTEGER(iwp), INTENT(IN) :: d2e !< 1087 INTEGER(iwp), INTENT(IN) :: d2s !< 1088 INTEGER(iwp), INTENT(IN) :: d3e !< 1089 INTEGER(iwp), INTENT(IN) :: d3s !< 1090 INTEGER(iwp), INTENT(IN) :: d4e !< 1091 INTEGER(iwp), INTENT(IN) :: d4s !< 1092 INTEGER(iwp), SAVE :: pe_from = 0 !< 1093 INTEGER(iwp), INTENT(OUT) :: win !< 1094 1095 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1096 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1097 1098 INTEGER(iwp), DIMENSION(4) :: buf_shape !< 1099 1100 REAL(dp), DIMENSION(:,:,:,:), POINTER :: buf !< 1101 REAL(dp), DIMENSION(:,:,:,:), POINTER :: p3 !< 1102 1103 TYPE(C_PTR), SAVE :: base_ptr !< 1104 TYPE(C_PTR), SAVE :: rem_ptr !< 1105 1106 1107 IF ( this%no_shared_memory_in_this_run ) RETURN 1108 ! 1109 !-- Allocate shared memory on node rank 0 PEs. 1110 IF ( this%sh_rank == pe_from ) THEN 1111 wsize = (d4e - d4s +1) * ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1112 ELSE 1113 wsize = 1 1114 ENDIF 1115 1116 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement 1117 ! unit 1118 1119 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1120 ! 1121 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1122 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 1123 ! 1124 !-- Convert C- to Fortran-pointer 1125 buf_shape(4) = d4e - d4s + 1 1126 buf_shape(3) = d3e - d3s + 1 1127 buf_shape(2) = d2e - d2s + 1 1128 buf_shape(1) = d1e - d1s + 1 1129 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 1130 p3(d1s:,d2s:,d3s:,d4s:) => buf 1131 ! 1132 !-- Allocate shared memory in round robin on all PEs of a node. 1133 pe_from = MOD( pe_from, this%sh_npes ) 1134 1135 END SUBROUTINE sm_allocate_shared_4d_64 1136 1137 1138 !--------------------------------------------------------------------------------------------------! 1139 ! Description: 1140 ! ------------ 1141 !> Allocate shared 4d-REAL (32 bit) array on PE 0 and pass address to all PEs. 1142 !--------------------------------------------------------------------------------------------------! 1143 SUBROUTINE sm_allocate_shared_4d_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, d4s, d4e, win ) 1144 1145 IMPLICIT NONE 1146 1147 CLASS(sm_class), INTENT(inout) :: this !< 1148 1149 INTEGER :: disp_unit !< 1150 INTEGER(iwp), INTENT(IN) :: d1e !< 1151 INTEGER(iwp), INTENT(IN) :: d1s !< 1152 INTEGER(iwp), INTENT(IN) :: d2e !< 1153 INTEGER(iwp), INTENT(IN) :: d2s !< 1154 INTEGER(iwp), INTENT(IN) :: d3e !< 1155 INTEGER(iwp), INTENT(IN) :: d3s !< 1156 INTEGER(iwp), INTENT(IN) :: d4e !< 1157 INTEGER(iwp), INTENT(IN) :: d4s !< 1158 INTEGER(iwp), SAVE :: pe_from = 0 !< 1159 INTEGER(iwp), INTENT(OUT) :: win !< 1160 1161 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1162 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1163 1164 INTEGER(iwp), DIMENSION(4) :: buf_shape !< 1165 1166 REAL(sp), DIMENSION(:,:,:,:), POINTER :: buf !< 1167 REAL(sp), DIMENSION(:,:,:,:), POINTER :: p3 !< 1168 1169 TYPE(C_PTR), SAVE :: base_ptr !< 1170 TYPE(C_PTR), SAVE :: rem_ptr !< 1171 1172 1173 IF ( this%no_shared_memory_in_this_run ) RETURN 1174 ! 1175 !-- Allocate shared memory on node rank 0 PEs. 1176 IF ( this%sh_rank == pe_from ) THEN 1177 wsize = (d4e - d4s +1) * ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1178 ELSE 1179 wsize = 1 1180 ENDIF 1181 1182 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 1183 ! unit 1184 1185 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1186 ! 1187 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1188 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 1189 ! 1190 !-- Convert C- to Fortran-pointer 1191 buf_shape(4) = d4e - d4s + 1 1192 buf_shape(3) = d3e - d3s + 1 1193 buf_shape(2) = d2e - d2s + 1 1194 buf_shape(1) = d1e - d1s + 1 1195 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 1196 p3(d1s:,d2s:,d3s:,d4s:) => buf 1197 ! 1198 !-- Allocate shared memory in round robin on all PEs of a node. 1199 pe_from = MOD( pe_from, this%sh_npes ) 1200 1201 END SUBROUTINE sm_allocate_shared_4d_32 1202 1203 1204 !--------------------------------------------------------------------------------------------------! 1205 ! Description: 1206 ! ------------ 1207 !> Allocate shared 3d-INTEGER (32 bit) array on PE 0 and pass address to all PEs. 1050 1208 !--------------------------------------------------------------------------------------------------! 1051 1209 SUBROUTINE sm_allocate_shared_3di_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 1053 1211 IMPLICIT NONE 1054 1212 1055 CLASS(sm_class), INTENT(inout) :: this1056 1057 INTEGER :: disp_unit1058 INTEGER , INTENT(IN):: d1e1059 INTEGER , INTENT(IN):: d1s1060 INTEGER , INTENT(IN):: d2e1061 INTEGER , INTENT(IN):: d2s1062 INTEGER , INTENT(IN):: d3e1063 INTEGER , INTENT(IN):: d3s1064 INTEGER , SAVE:: pe_from = 01065 INTEGER , INTENT(OUT):: win1066 1067 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size1068 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize1069 1070 INTEGER , DIMENSION(3):: buf_shape1213 CLASS(sm_class), INTENT(inout) :: this 1214 1215 INTEGER :: disp_unit 1216 INTEGER(iwp), INTENT(IN) :: d1e 1217 INTEGER(iwp), INTENT(IN) :: d1s 1218 INTEGER(iwp), INTENT(IN) :: d2e 1219 INTEGER(iwp), INTENT(IN) :: d2s 1220 INTEGER(iwp), INTENT(IN) :: d3e 1221 INTEGER(iwp), INTENT(IN) :: d3s 1222 INTEGER(iwp), SAVE :: pe_from = 0 1223 INTEGER(iwp), INTENT(OUT) :: win 1224 1225 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 1226 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 1227 1228 INTEGER(iwp), DIMENSION(3) :: buf_shape 1071 1229 1072 1230 INTEGER(isp), DIMENSION(:,:,:), POINTER :: buf 1073 1231 INTEGER(isp), DIMENSION(:,:,:), POINTER :: p3 1074 1232 1075 TYPE(C_PTR), SAVE :: base_ptr1076 TYPE(C_PTR), SAVE :: rem_ptr1077 1078 1079 IF ( this%no_shared_memory_in_this_run ) RETURN 1080 ! 1081 !-- Allocate shared memory on node rank 0 threads.1233 TYPE(C_PTR), SAVE :: base_ptr 1234 TYPE(C_PTR), SAVE :: rem_ptr 1235 1236 1237 IF ( this%no_shared_memory_in_this_run ) RETURN 1238 ! 1239 !-- Allocate shared memory on node rank 0 PEs. 1082 1240 IF ( this%sh_rank == pe_from ) THEN 1083 1241 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 1110 1268 ! Description: 1111 1269 ! ------------ 1112 !> Allocate shared 3d- REAL (64 bit) array on ALL threads1270 !> Allocate shared 3d-INTEGER (64 bit) array on PE 0 and pass address to all PEs. 1113 1271 !--------------------------------------------------------------------------------------------------! 1114 1272 SUBROUTINE sm_allocate_shared_3di_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 1116 1274 IMPLICIT NONE 1117 1275 1118 CLASS(sm_class), INTENT(inout) :: this !<1119 1120 INTEGER :: disp_unit !<1121 INTEGER , INTENT(IN):: d1e !<1122 INTEGER , INTENT(IN):: d1s !<1123 INTEGER , INTENT(IN):: d2e !<1124 INTEGER , INTENT(IN):: d2s !<1125 INTEGER , INTENT(IN):: d3e !<1126 INTEGER , INTENT(IN):: d3s !<1127 INTEGER , SAVE:: pe_from = 0 !<1128 INTEGER , INTENT(OUT):: win !<1129 1130 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !<1131 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !<1132 1133 INTEGER , DIMENSION(3):: buf_shape !<1276 CLASS(sm_class), INTENT(inout) :: this !< 1277 1278 INTEGER :: disp_unit !< 1279 INTEGER(iwp), INTENT(IN) :: d1e !< 1280 INTEGER(iwp), INTENT(IN) :: d1s !< 1281 INTEGER(iwp), INTENT(IN) :: d2e !< 1282 INTEGER(iwp), INTENT(IN) :: d2s !< 1283 INTEGER(iwp), INTENT(IN) :: d3e !< 1284 INTEGER(iwp), INTENT(IN) :: d3s !< 1285 INTEGER(iwp), SAVE :: pe_from = 0 !< 1286 INTEGER(iwp), INTENT(OUT) :: win !< 1287 1288 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1289 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1290 1291 INTEGER(iwp), DIMENSION(3) :: buf_shape !< 1134 1292 1135 1293 INTEGER(idp), DIMENSION(:,:,:), POINTER :: buf !< 1136 1294 INTEGER(idp), DIMENSION(:,:,:), POINTER :: p3 !< 1137 1295 1138 TYPE(C_PTR), SAVE :: base_ptr !<1139 TYPE(C_PTR), SAVE :: rem_ptr !<1140 1141 1142 IF ( this%no_shared_memory_in_this_run ) RETURN 1143 ! 1144 !-- Allocate shared memory on node rank 0 threads.1296 TYPE(C_PTR), SAVE :: base_ptr !< 1297 TYPE(C_PTR), SAVE :: rem_ptr !< 1298 1299 1300 IF ( this%no_shared_memory_in_this_run ) RETURN 1301 ! 1302 !-- Allocate shared memory on node rank 0 PEs. 1145 1303 IF ( this%sh_rank == pe_from ) THEN 1146 1304 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 1169 1327 END SUBROUTINE sm_allocate_shared_3di_64 1170 1328 1329 1330 !--------------------------------------------------------------------------------------------------! 1331 ! Description: 1332 ! ------------ 1333 !> Allocate shared 3d-REAL (64 Bit) array on ALL PEs. 1334 !> 1335 !> Every PE allocates the local part of a node-shared array. 1336 !> The C-Pointer of this array and the local limits are broadcasted to all PEs of the node 1337 !> The information is store in an array of type sm_remote_array and can be retrieved 1338 !> by sm_remote_array to access remote data. 1339 !--------------------------------------------------------------------------------------------------! 1340 SUBROUTINE sm_all_allocate_shared_3d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, remote_arrays, win ) 1341 1342 IMPLICIT NONE 1343 1344 CLASS(sm_class), INTENT(inout) :: this !< class pointer 1345 REAL(dp), DIMENSION(:,:,:), POINTER :: p3 !< return local array pointer 1346 1347 INTEGER(iwp), INTENT(IN) :: d1e !< end index dimension 1 1348 INTEGER(iwp), INTENT(IN) :: d1s !< start index dimension 1 1349 INTEGER(iwp), INTENT(IN) :: d2e !< end index dimension 2 1350 INTEGER(iwp), INTENT(IN) :: d2s !< start index dimension 2 1351 INTEGER(iwp), INTENT(IN) :: d3e !< end index dimension 3 1352 INTEGER(iwp), INTENT(IN) :: d3s !< start index dimension 3 1353 INTEGER(iwp), INTENT(OUT) :: win !< MPI Window 1354 1355 INTEGER(iwp), DIMENSION(3) :: buf_shape !< 1356 INTEGER(iwp) :: disp_unit !< 1357 INTEGER(iwp) :: i !< 1358 INTEGER(iwp), SAVE :: pe_from = 0 !< 1359 1360 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1361 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1362 1363 REAL(dp), DIMENSION(:,:,:), POINTER :: buf !< 1364 1365 TYPE(sm_remote_array),INTENT(INOUT), DIMENSION(0:this%sh_npes-1) :: remote_arrays !< info about all remote arrays 1366 1367 TYPE(C_PTR), SAVE :: base_ptr !< 1368 1369 INTEGER(iwp),DIMENSION(6,0:this%sh_npes-1) :: all_indices_s 1370 INTEGER(iwp),DIMENSION(6,0:this%sh_npes-1) :: all_indices 1371 1372 1373 IF ( this%no_shared_memory_in_this_run ) RETURN 1374 1375 all_indices_s = 0 1376 1377 1378 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1379 1380 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement unit 1381 1382 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1383 ! 1384 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1385 1386 all_indices_s(1,this%sh_rank) = d1s 1387 all_indices_s(2,this%sh_rank) = d1e 1388 all_indices_s(3,this%sh_rank) = d2s 1389 all_indices_s(4,this%sh_rank) = d2e 1390 all_indices_s(5,this%sh_rank) = d3s 1391 all_indices_s(6,this%sh_rank) = d3e 1392 1393 CALL MPI_ALLREDUCE (all_indices_s ,all_indices, SIZE(all_indices_s), MPI_INTEGER, MPI_SUM, this%comm_shared, ierr) 1394 1395 DO i=0,this%sh_npes-1 1396 CALL MPI_WIN_SHARED_QUERY( win, i, rem_size, disp_unit, remote_arrays(i)%rem_ptr, ierr ) 1397 remote_arrays(i)%d1s = all_indices(1,i) 1398 remote_arrays(i)%d1e = all_indices(2,i) 1399 remote_arrays(i)%d2s = all_indices(3,i) 1400 remote_arrays(i)%d2e = all_indices(4,i) 1401 remote_arrays(i)%d3s = all_indices(5,i) 1402 remote_arrays(i)%d3e = all_indices(6,i) 1403 END DO 1404 1405 ! 1406 !-- Convert C- to Fortran-pointer 1407 buf_shape(3) = d3e - d3s + 1 1408 buf_shape(2) = d2e - d2s + 1 1409 buf_shape(1) = d1e - d1s + 1 1410 CALL C_F_POINTER( remote_arrays(this%sh_rank)%rem_ptr, buf, buf_shape ) 1411 p3(d1s:,d2s:,d3s:) => buf 1412 ! 1413 !-- Allocate shared memory in round robin on all PEs of a node. 1414 pe_from = MOD( pe_from, this%sh_npes ) 1415 1416 END SUBROUTINE sm_all_allocate_shared_3d_64 1171 1417 #endif 1172 1418 … … 1243 1489 !> ... 1244 1490 !--------------------------------------------------------------------------------------------------! 1245 SUBROUTINE sm_node_barrier( this ) 1246 1247 IMPLICIT NONE 1491 SUBROUTINE sm_node_barrier( this, win ) 1492 1493 IMPLICIT NONE 1494 1495 INTEGER(iwp), OPTIONAL :: win !< 1248 1496 1249 1497 CLASS(sm_class), INTENT(inout) :: this !< … … 1254 1502 #if defined( __parallel ) 1255 1503 CALL MPI_BARRIER( this%comm_shared, ierr ) 1504 IF ( PRESENT(win) ) THEN 1505 CALL MPI_WIN_FENCE(0, win, ierr ) 1506 ENDIF 1256 1507 #endif 1257 1508 -
palm/trunk/SOURCE/surface_data_output_mod.f90
r4892 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance 28 ! 29 ! 4892 2021-03-02 11:53:58Z suehring 27 30 ! Remove outdated error message. 28 31 ! … … 4578 4581 4579 4582 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: end_index !< end index of surface data at (j,i) 4580 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: global_start_index !< index array for surface data (MPI-IO) 4583 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: global_end_index !< end index array for surface data (MPI-IO) 4584 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: global_start_index !< start index array for surface data (MPI-IO) 4581 4585 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: num_surf !< number of surface data at (j,i) 4582 4586 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: start_index !< start index of surface data at (j,i) … … 4600 4604 ALLOCATE( surf_in(1:surfaces%ns) ) 4601 4605 4602 CALL rd_mpi_io_check_array( 'surfaces%start_index', found = array_found )4603 IF ( array_found ) CALL rrd_mpi_io( 'surfaces%start_index', start_index )4604 4605 CALL rd_mpi_io_check_array( 'surfaces%end_index', found = array_found )4606 IF ( array_found ) CALL rrd_mpi_io( 'surfaces%end_index', end_index )4607 4608 4606 CALL rd_mpi_io_check_array( 'surfaces%global_start_index', found = array_found ) 4609 4607 IF ( array_found ) CALL rrd_mpi_io( 'surfaces%global_start_index', global_start_index ) 4610 4608 4611 CALL rd_mpi_io_surface_filetypes( start_index, end_index, ldum, global_start_index ) 4609 CALL rd_mpi_io_check_array( 'surfaces%global_end_index', found = array_found ) 4610 IF ( array_found ) CALL rrd_mpi_io( 'surfaces%global_end_index', global_end_index ) 4611 4612 CALL rd_mpi_io_surface_filetypes( start_index, end_index, ldum, global_start_index, & 4613 global_end_index ) 4612 4614 4613 4615 DO nv = 1, dosurf_no(1) … … 4728 4730 4729 4731 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: end_index !< end index of surface data at (j,i) 4730 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: global_start_index !< index array for surface data (MPI-IO) 4732 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: global_end_index !< end index array for surface data (MPI-IO) 4733 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: global_start_index !< start index array for surface data (MPI-IO) 4731 4734 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: num_surf !< number of surface data at (j,i) 4732 4735 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: start_index !< start index of surface data at (j,i) … … 4811 4814 4812 4815 CALL rd_mpi_io_surface_filetypes( start_index, end_index, surface_data_to_write, & 4813 global_start_index ) 4814 CALL wrd_mpi_io( 'surfaces%start_index', start_index ) 4815 CALL wrd_mpi_io( 'surfaces%end_index', end_index ) 4816 global_start_index, global_end_index ) 4817 4816 4818 CALL wrd_mpi_io( 'surfaces%global_start_index', global_start_index ) 4819 CALL wrd_mpi_io( 'surfaces%global_end_index', global_end_index ) 4817 4820 4818 4821 DO nv = 1, dosurf_no(1) -
palm/trunk/SOURCE/surface_mod.f90
r4882 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance 28 ! 29 ! 4882 2021-02-19 22:49:44Z forkel 27 30 ! removed lsp in subroutine nitialize_top 28 !29 31 ! 30 32 ! 4881 2021-02-19 22:05:08Z forkel 31 33 ! removed constant_top_csflux option 32 !33 34 ! 34 35 ! 4877 2021-02-17 16:17:35Z suehring … … 1484 1485 !> Allocating memory for upward and downward-facing horizontal surface types, except for top fluxes. 1485 1486 !--------------------------------------------------------------------------------------------------! 1486 SUBROUTINE allocate_surface_attributes_h( surfaces, nys_l, nyn_l, nxl_l, nxr_l ) 1487 SUBROUTINE allocate_surface_attributes_h( surfaces, nys_l, nyn_l, nxl_l, nxr_l, & 1488 no_allocate_index_arrays ) 1487 1489 1488 1490 IMPLICIT NONE … … 1493 1495 INTEGER(iwp) :: nxr_l !< east bound of local 2d array start/end_index, is equal to nyn, except for restart-array 1494 1496 1497 LOGICAL :: allocate_index_arrays 1498 LOGICAL, INTENT(IN), OPTIONAL :: no_allocate_index_arrays 1499 1495 1500 TYPE(surf_type) :: surfaces !< respective surface type 1496 1501 1502 1503 IF ( PRESENT( no_allocate_index_arrays ) ) THEN 1504 allocate_index_arrays = .NOT. no_allocate_index_arrays 1505 ELSE 1506 allocate_index_arrays = .TRUE. 1507 ENDIF 1497 1508 ! 1498 1509 !-- Allocate arrays for start and end index of horizontal surface type for each (j,i)-grid point. 1499 !-- This is required e.g. in diff ion_x, which is called for each (j,i). In order to find the1510 !-- This is required e.g. in diffusion_x, which is called for each (j,i). In order to find the 1500 1511 !-- location where the respective flux is store within the surface-type, start- and end-index are 1501 1512 !-- stored for each (j,i). For example, each (j,i) can have several entries where fluxes for … … 1503 1514 !-- surfaces might exist for given (j,i). If no surface of respective type exist at current (j,i), 1504 1515 !-- set indicies such that loop in diffusion routines will not be entered. 1505 ALLOCATE ( surfaces%start_index(nys_l:nyn_l,nxl_l:nxr_l) ) 1506 ALLOCATE ( surfaces%end_index(nys_l:nyn_l,nxl_l:nxr_l) ) 1507 surfaces%start_index = 0 1508 surfaces%end_index = -1 1516 IF ( allocate_index_arrays ) THEN 1517 ALLOCATE( surfaces%start_index(nys_l:nyn_l,nxl_l:nxr_l) ) 1518 ALLOCATE( surfaces%end_index(nys_l:nyn_l,nxl_l:nxr_l) ) 1519 surfaces%start_index = 0 1520 surfaces%end_index = -1 1521 ENDIF 1509 1522 ! 1510 1523 !-- Indices to locate surface element … … 2051 2064 !> Allocating memory for vertical surface types. 2052 2065 !--------------------------------------------------------------------------------------------------! 2053 SUBROUTINE allocate_surface_attributes_v( surfaces, nys_l, nyn_l, nxl_l, nxr_l ) 2066 SUBROUTINE allocate_surface_attributes_v( surfaces, nys_l, nyn_l, nxl_l, nxr_l, & 2067 no_allocate_index_arrays ) 2054 2068 2055 2069 IMPLICIT NONE … … 2060 2074 INTEGER(iwp) :: nxr_l !< east bound of local 2d array start/end_index, is equal to nyn, except for restart-array 2061 2075 2076 LOGICAL :: allocate_index_arrays 2077 LOGICAL, INTENT(IN), OPTIONAL :: no_allocate_index_arrays 2078 2062 2079 TYPE(surf_type) :: surfaces !< respective surface type 2063 2080 2081 2082 IF ( PRESENT( no_allocate_index_arrays ) ) THEN 2083 allocate_index_arrays = .NOT. no_allocate_index_arrays 2084 ELSE 2085 allocate_index_arrays = .TRUE. 2086 ENDIF 2087 2064 2088 ! 2065 2089 !-- Allocate arrays for start and end index of vertical surface type for each (j,i)-grid point. This 2066 !-- is required in diff ion_x, which is called for each (j,i). In order to find the location where2090 !-- is required in diffusion_x, which is called for each (j,i). In order to find the location where 2067 2091 !-- the respective flux is store within the surface-type, start- and end-index are stored for each 2068 2092 !-- (j,i). For example, each (j,i) can have several entries where fluxes for vertical surfaces might 2069 2093 !-- be stored. In the flat case, where no vertical walls exit, set indicies such that loop in 2070 2094 !-- diffusion routines will not be entered. 2071 ALLOCATE ( surfaces%start_index(nys_l:nyn_l,nxl_l:nxr_l) ) 2072 ALLOCATE ( surfaces%end_index(nys_l:nyn_l,nxl_l:nxr_l) ) 2073 surfaces%start_index = 0 2074 surfaces%end_index = -1 2095 IF ( allocate_index_arrays ) THEN 2096 ALLOCATE( surfaces%start_index(nys_l:nyn_l,nxl_l:nxr_l) ) 2097 ALLOCATE( surfaces%end_index(nys_l:nyn_l,nxl_l:nxr_l) ) 2098 surfaces%start_index = 0 2099 surfaces%end_index = -1 2100 ENDIF 2075 2101 ! 2076 2102 !-- Indices to locate surface element. … … 3141 3167 INTEGER(iwp), DIMENSION(0:3) :: start_index_v !< start index for vertical surface elements on gathered surface array 3142 3168 3143 INTEGER(iwp),DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< index for surface data (MPI-IO) 3169 INTEGER(iwp),DIMENSION(nys:nyn,nxl:nxr) :: global_end_index !< end index for surface data (MPI-IO) 3170 INTEGER(iwp),DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< start index for surface data (MPI-IO) 3144 3171 3145 3172 LOGICAL :: surface_data_to_write !< switch for MPI-I/O if PE has surface data to write … … 3988 4015 3989 4016 CALL rd_mpi_io_surface_filetypes( surf_h(l)%start_index, surf_h(l)%end_index, & 3990 surface_data_to_write, global_start_index ) 4017 surface_data_to_write, global_start_index, & 4018 global_end_index ) 3991 4019 IF ( .NOT. surface_data_to_write ) CYCLE 3992 4020 3993 4021 ns_h_on_file(l) = total_number_of_surface_values 3994 4022 3995 CALL wrd_mpi_io( 'surf_h(' // dum // ')%start_index', surf_h(l)%start_index )3996 CALL wrd_mpi_io( 'surf_h(' // dum // ')%end_index', surf_h(l)%end_index )3997 4023 CALL wrd_mpi_io( 'global_start_index_h_' // dum, global_start_index ) 4024 CALL wrd_mpi_io( 'global_end_index_h_' // dum, global_end_index ) 3998 4025 3999 4026 IF ( ALLOCATED ( surf_h(l)%us ) ) THEN … … 4121 4148 4122 4149 CALL rd_mpi_io_surface_filetypes( surf_v(l)%start_index, surf_v(l)%end_index, & 4123 surface_data_to_write, global_start_index ) 4150 surface_data_to_write, global_start_index, & 4151 global_end_index ) 4152 4153 IF ( .NOT. surface_data_to_write ) CYCLE 4124 4154 4125 4155 ns_v_on_file(l) = total_number_of_surface_values 4126 4156 4127 CALL wrd_mpi_io( 'surf_v(' // dum // ')%start_index', surf_v(l)%start_index )4128 CALL wrd_mpi_io( 'surf_v(' // dum // ')%end_index', surf_v(l)%end_index )4129 4157 CALL wrd_mpi_io( 'global_start_index_v_' // dum, global_start_index ) 4158 CALL wrd_mpi_io( 'global_end_index_v_' // dum, global_end_index ) 4130 4159 4131 4160 IF ( .NOT. surface_data_to_write ) CYCLE … … 5381 5410 INTEGER(iwp) :: mm !< loop index for surface types - file array 5382 5411 5383 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< index for surface data (MPI-IO) 5384 5385 LOGICAL :: ldum !< dummy variable 5412 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_end_index !< end index for surface data (MPI-IO) 5413 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< start index for surface data (MPI-IO) 5414 5415 LOGICAL :: data_to_read !< cycle in l loop, if no values to read 5386 5416 LOGICAL :: surf_match_def !< flag indicating that surface element is of default type 5387 5417 LOGICAL :: surf_match_lsm !< flag indicating that surface element is of natural type … … 5401 5431 IF ( ns_h_on_file(l) == 0 ) CYCLE !< No data of this surface type on file 5402 5432 5433 WRITE( dum, '(I1)') l 5434 5403 5435 IF ( ALLOCATED( surf_h(l)%start_index ) ) CALL deallocate_surface_attributes_h( surf_h(l) ) 5404 surf_h(l)%ns = ns_h_on_file(l) 5405 CALL allocate_surface_attributes_h( surf_h(l), nys, nyn, nxl, nxr ) 5406 5407 WRITE( dum, '(I1)') l 5408 5409 CALL rrd_mpi_io( 'surf_h(' // dum // ')%start_index', surf_h(l)%start_index ) 5410 CALL rrd_mpi_io( 'surf_h(' // dum // ')%end_index', surf_h(l)%end_index ) 5436 5437 ALLOCATE( surf_h(l)%start_index(nys:nyn,nxl:nxr) ) 5438 ALLOCATE( surf_h(l)%end_index(nys:nyn,nxl:nxr) ) 5439 surf_h(l)%start_index = 0 5440 surf_h(l)%end_index = -1 5441 5411 5442 CALL rrd_mpi_io( 'global_start_index_h_' // dum , global_start_index ) 5412 5413 CALL rd_mpi_io_surface_filetypes( surf_h(l)%start_index, surf_h(l)%end_index, ldum, & 5414 global_start_index ) 5443 CALL rrd_mpi_io( 'global_end_index_h_' // dum , global_end_index ) 5444 5445 CALL rd_mpi_io_surface_filetypes( surf_h(l)%start_index, surf_h(l)%end_index, data_to_read, & 5446 global_start_index, global_end_index ) 5447 5448 surf_h(l)%ns = MAX( 2, MAXVAL( surf_h(l)%end_index ) ) 5449 5450 CALL allocate_surface_attributes_h( surf_h(l), nys, nyn, nxl, nxr, & 5451 no_allocate_index_arrays = .TRUE. ) 5452 IF ( .NOT. data_to_read ) CYCLE 5415 5453 5416 5454 IF ( ALLOCATED ( surf_h(l)%us ) ) THEN … … 5539 5577 5540 5578 IF ( ALLOCATED( surf_v(l)%start_index ) ) CALL deallocate_surface_attributes_v( surf_v(l) ) 5541 surf_v(l)%ns = ns_v_on_file(l) 5542 CALL allocate_surface_attributes_v( surf_v(l), nys, nyn, nxl, nxr ) 5543 5544 WRITE( dum, '(I1)' ) l 5545 5546 CALL rrd_mpi_io( 'surf_v(' // dum // ')%start_index', surf_v(l)%start_index ) 5547 CALL rrd_mpi_io( 'surf_v(' // dum // ')%end_index', surf_v(l)%end_index ) 5579 5580 ALLOCATE( surf_v(l)%start_index(nys:nyn,nxl:nxr) ) 5581 ALLOCATE( surf_v(l)%end_index(nys:nyn,nxl:nxr) ) 5582 surf_v(l)%start_index = 0 5583 surf_v(l)%end_index = -1 5584 5585 WRITE( dum, '(I1)' ) l 5586 5548 5587 CALL rrd_mpi_io( 'global_start_index_v_' // dum , global_start_index ) 5549 5550 CALL rd_mpi_io_surface_filetypes( surf_v(l)%start_index, surf_v(l)%end_index, ldum, & 5551 global_start_index ) 5588 CALL rrd_mpi_io( 'global_end_index_v_' // dum , global_end_index ) 5589 5590 CALL rd_mpi_io_surface_filetypes( surf_v(l)%start_index, surf_v(l)%end_index, data_to_read, & 5591 global_start_index, global_end_index ) 5592 IF ( .NOT. data_to_read ) CYCLE 5593 5594 surf_v(l)%ns = MAX( 2, MAXVAL( surf_v(l)%end_index ) ) 5595 CALL allocate_surface_attributes_v( surf_v(l), nys, nyn, nxl, nxr, & 5596 no_allocate_index_arrays = .TRUE. ) 5552 5597 5553 5598 IF ( ALLOCATED ( surf_v(l)%us ) ) THEN -
palm/trunk/SOURCE/urban_surface_mod.f90
r4872 r4893 27 27 ! ----------------- 28 28 ! $Id$ 29 ! revised output of surface data via MPI-IO for better performance 30 ! 31 ! 4872 2021-02-12 15:49:02Z raasch 29 32 ! internal switch removed from namelist 30 33 ! … … 6608 6611 SUBROUTINE usm_rrd_local_mpi 6609 6612 6610 6611 6613 CHARACTER(LEN=1) :: dum !< dummy string to create input-variable name 6612 6614 6613 6615 INTEGER(iwp) :: l !< loop index for surface types 6614 6616 6615 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start 6616 6617 LOGICAL :: ldum !< dummy variable 6617 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_end_index 6618 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start_index 6619 6620 LOGICAL :: data_to_read !< dummy variable 6621 6618 6622 6619 6623 DO l = 0, 1 … … 6621 6625 WRITE( dum, '(I1)' ) l 6622 6626 6623 CALL rrd_mpi_io( 'usm_start_index_h_' //dum, surf_usm_h(l)%start_index ) 6624 CALL rrd_mpi_io( 'usm_end_index_h_' //dum, surf_usm_h(l)%end_index ) 6625 CALL rrd_mpi_io( 'usm_global_start_h_' //dum, global_start ) 6626 6627 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, ldum, & 6628 global_start ) 6629 6630 IF ( MAXVAL( surf_usm_h(l)%end_index ) <= 0 ) CYCLE 6631 6632 IF ( .NOT. ALLOCATED( t_surf_wall_h_1(l)%val ) ) & 6627 CALL rrd_mpi_io( 'usm_global_start_h_' //dum, global_start_index ) 6628 CALL rrd_mpi_io( 'usm_global_end_h_' //dum, global_end_index ) 6629 6630 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, & 6631 data_to_read, global_start_index, global_end_index ) 6632 IF ( .NOT. data_to_read ) CYCLE 6633 6634 IF ( .NOT. ALLOCATED( t_surf_wall_h_1(l)%val ) ) & 6633 6635 ALLOCATE( t_surf_wall_h_1(l)%val(1:surf_usm_h(l)%ns) ) 6634 6636 CALL rrd_mpi_io_surface( 't_surf_wall_h(' // dum // ')', t_surf_wall_h_1(l)%val ) 6635 6637 6636 IF ( .NOT. ALLOCATED( t_surf_window_h_1(l)%val ) )&6638 IF ( .NOT. ALLOCATED( t_surf_window_h_1(l)%val ) ) & 6637 6639 ALLOCATE( t_surf_window_h_1(l)%val(1:surf_usm_h(l)%ns) ) 6638 6640 CALL rrd_mpi_io_surface( 't_surf_window_h(' // dum // ')', t_surf_window_h_1(l)%val ) 6639 6641 6640 IF ( .NOT. ALLOCATED( t_surf_green_h_1(l)%val ) )&6642 IF ( .NOT. ALLOCATED( t_surf_green_h_1(l)%val ) ) & 6641 6643 ALLOCATE( t_surf_green_h_1(l)%val(1:surf_usm_h(l)%ns) ) 6642 6644 CALL rrd_mpi_io_surface( 't_surf_green_h(' // dum // ')', t_surf_green_h_1(l)%val ) 6643 6645 6644 IF ( .NOT. ALLOCATED( m_liq_usm_h_1(l)%val ) )&6646 IF ( .NOT. ALLOCATED( m_liq_usm_h_1(l)%val ) ) & 6645 6647 ALLOCATE( m_liq_usm_h_1(l)%val(1:surf_usm_h(l)%ns) ) 6646 6648 CALL rrd_mpi_io_surface( 'm_liq_usm_h(' // dum // ')', m_liq_usm_h_1(l)%val ) 6647 6649 6648 6650 IF ( indoor_model ) THEN 6649 IF ( .NOT. ALLOCATED( surf_usm_h(l)%waste_heat ) )&6651 IF ( .NOT. ALLOCATED( surf_usm_h(l)%waste_heat ) ) & 6650 6652 ALLOCATE( surf_usm_h(l)%waste_heat(1:surf_usm_h(l)%ns) ) 6651 6653 CALL rrd_mpi_io_surface( 'waste_heat_h(' // dum // ')', surf_usm_h(l)%waste_heat ) 6652 IF ( .NOT. ALLOCATED( surf_usm_h(l)%t_prev ) )&6654 IF ( .NOT. ALLOCATED( surf_usm_h(l)%t_prev ) ) & 6653 6655 ALLOCATE( surf_usm_h(l)%t_prev(1:surf_usm_h(l)%ns) ) 6654 6656 CALL rrd_mpi_io_surface( 't_prev_h(' // dum // ')', surf_usm_h(l)%t_prev ) … … 6660 6662 WRITE( dum, '(I1)' ) l 6661 6663 6662 CALL rrd_mpi_io( 'usm_start_index_v_' //dum, surf_usm_v(l)%start_index ) 6663 CALL rrd_mpi_io( 'usm_end_index_v_' // dum, surf_usm_v(l)%end_index ) 6664 CALL rrd_mpi_io( 'usm_global_start_v_' // dum, global_start ) 6665 6666 CALL rd_mpi_io_surface_filetypes( surf_usm_v(l)%start_index, surf_usm_v(l)%end_index, ldum, & 6667 global_start ) 6668 6669 IF ( MAXVAL( surf_usm_v(l)%end_index ) <= 0 ) CYCLE 6670 6671 IF ( .NOT. ALLOCATED( t_surf_wall_v_1(l)%val ) ) & 6664 CALL rrd_mpi_io( 'usm_global_start_v_' // dum, global_start_index ) 6665 CALL rrd_mpi_io( 'usm_global_end_v_' // dum, global_end_index ) 6666 6667 CALL rd_mpi_io_surface_filetypes( surf_usm_v(l)%start_index, surf_usm_v(l)%end_index, & 6668 data_to_read, global_start_index, global_end_index ) 6669 IF ( .NOT. data_to_read ) CYCLE 6670 6671 IF ( .NOT. ALLOCATED( t_surf_wall_v_1(l)%val ) ) & 6672 6672 ALLOCATE( t_surf_wall_v_1(l)%val(1:surf_usm_v(l)%ns) ) 6673 6673 CALL rrd_mpi_io_surface( 't_surf_wall_v(' // dum // ')', t_surf_wall_v_1(l)%val ) 6674 6674 6675 IF ( .NOT. ALLOCATED( t_surf_window_v_1(l)%val ) )&6675 IF ( .NOT. ALLOCATED( t_surf_window_v_1(l)%val ) ) & 6676 6676 ALLOCATE( t_surf_window_v_1(l)%val(1:surf_usm_v(l)%ns) ) 6677 6677 CALL rrd_mpi_io_surface( 't_surf_window_v(' // dum // ')', t_surf_window_v_1(l)%val ) 6678 6678 6679 IF ( .NOT. ALLOCATED( t_surf_green_v_1(l)%val ) )&6679 IF ( .NOT. ALLOCATED( t_surf_green_v_1(l)%val ) ) & 6680 6680 ALLOCATE( t_surf_green_v_1(l)%val(1:surf_usm_v(l)%ns) ) 6681 6681 CALL rrd_mpi_io_surface( 't_surf_green_v(' // dum // ')', t_surf_green_v_1(l)%val) 6682 6682 6683 6683 IF ( indoor_model ) THEN 6684 IF ( .NOT. ALLOCATED( surf_usm_v(l)%waste_heat ) )&6684 IF ( .NOT. ALLOCATED( surf_usm_v(l)%waste_heat ) ) & 6685 6685 ALLOCATE( surf_usm_v(l)%waste_heat(1:surf_usm_v(l)%ns) ) 6686 6686 CALL rrd_mpi_io_surface( 'waste_heat_v(' // dum // ')', surf_usm_v(l)%waste_heat ) 6687 IF ( .NOT. ALLOCATED( surf_usm_v(l)%t_prev ) )&6687 IF ( .NOT. ALLOCATED( surf_usm_v(l)%t_prev ) ) & 6688 6688 ALLOCATE( surf_usm_v(l)%t_prev(1:surf_usm_v(l)%ns) ) 6689 6689 CALL rrd_mpi_io_surface( 't_prev_v(' // dum // ')', surf_usm_v(l)%t_prev ) … … 6696 6696 WRITE( dum, '(I1)' ) l 6697 6697 6698 CALL rrd_mpi_io( 'usm_start_index_h_2_' //dum, surf_usm_h(l)%start_index ) 6699 CALL rrd_mpi_io( 'usm_end_index_h_2_' //dum, surf_usm_h(l)%end_index ) 6700 CALL rrd_mpi_io( 'usm_global_start_h_2_' //dum, global_start ) 6701 6702 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, ldum, & 6703 global_start ) 6704 6705 IF ( MAXVAL( surf_usm_h(l)%end_index ) <= 0 ) CYCLE 6706 6707 IF ( .NOT. ALLOCATED( t_wall_h_1(l)%val ) ) & 6698 CALL rrd_mpi_io( 'usm_global_start_h_2_' //dum, global_start_index ) 6699 CALL rrd_mpi_io( 'usm_global_end_h_2_' //dum, global_end_index ) 6700 6701 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, & 6702 data_to_read, global_start_index, global_end_index ) 6703 IF ( .NOT. data_to_read ) CYCLE 6704 6705 IF ( .NOT. ALLOCATED( t_wall_h_1(l)%val ) ) & 6708 6706 ALLOCATE( t_wall_h_1(l)%val(nzb_wall:nzt_wall+1,1:surf_usm_h(l)%ns) ) 6709 6707 CALL rrd_mpi_io_surface( 't_wall_h(' // dum // ')', t_wall_h_1(l)%val ) 6710 6708 6711 IF ( .NOT. ALLOCATED( t_window_h_1(l)%val ) )&6709 IF ( .NOT. ALLOCATED( t_window_h_1(l)%val ) ) & 6712 6710 ALLOCATE( t_window_h_1(l)%val(nzb_wall:nzt_wall+1,1:surf_usm_h(l)%ns) ) 6713 6711 CALL rrd_mpi_io_surface( 't_window_h(' // dum // ')', t_window_h_1(l)%val ) 6714 6712 6715 IF ( .NOT. ALLOCATED( t_green_h_1(l)%val ) )&6713 IF ( .NOT. ALLOCATED( t_green_h_1(l)%val ) ) & 6716 6714 ALLOCATE( t_green_h_1(l)%val(nzb_wall:nzt_wall+1,1:surf_usm_h(l)%ns) ) 6717 6715 CALL rrd_mpi_io_surface( 't_green_h(' // dum // ')', t_green_h_1(l)%val ) … … 6723 6721 WRITE( dum, '(I1)' ) l 6724 6722 6725 CALL rrd_mpi_io( 'usm_start_index_v_2_' //dum, surf_usm_v(l)%start_index ) 6726 CALL rrd_mpi_io( 'usm_end_index_v_2_' // dum, surf_usm_v(l)%end_index ) 6727 CALL rrd_mpi_io( 'usm_global_start_v_2_' // dum, global_start ) 6728 6729 CALL rd_mpi_io_surface_filetypes( surf_usm_v(l)%start_index, surf_usm_v(l)%end_index, ldum, & 6730 global_start ) 6731 6732 IF ( MAXVAL( surf_usm_v(l)%end_index ) <= 0 ) CYCLE 6733 6734 IF ( .NOT. ALLOCATED( t_wall_v_1(l)%val ) ) & 6723 CALL rrd_mpi_io( 'usm_global_start_v_2_' // dum, global_start_index ) 6724 CALL rrd_mpi_io( 'usm_global_end_v_2_' // dum, global_end_index ) 6725 6726 CALL rd_mpi_io_surface_filetypes( surf_usm_v(l)%start_index, surf_usm_v(l)%end_index, & 6727 data_to_read, global_start_index, global_end_index ) 6728 IF ( .NOT. data_to_read ) CYCLE 6729 6730 IF ( .NOT. ALLOCATED( t_wall_v_1(l)%val ) ) & 6735 6731 ALLOCATE ( t_wall_v_1(l)%val(nzb_wall:nzt_wall+1,1:surf_usm_v(l)%ns) ) 6736 6732 CALL rrd_mpi_io_surface( 't_wall_v(' // dum // ')', t_wall_v_1(l)%val ) 6737 6733 6738 IF ( .NOT. ALLOCATED( t_window_v_1(l)%val ) ) 6734 IF ( .NOT. ALLOCATED( t_window_v_1(l)%val ) ) & 6739 6735 ALLOCATE ( t_window_v_1(l)%val(nzb_wall:nzt_wall+1,1:surf_usm_v(l)%ns) ) 6740 6736 CALL rrd_mpi_io_surface( 't_window_v(' // dum // ')', t_window_v_1(l)%val ) 6741 6737 6742 IF ( .NOT. ALLOCATED( t_green_v_1(l)%val ) ) 6738 IF ( .NOT. ALLOCATED( t_green_v_1(l)%val ) ) & 6743 6739 ALLOCATE ( t_green_v_1(l)%val(nzb_wall:nzt_wall+1,1:surf_usm_v(l)%ns) ) 6744 6740 CALL rrd_mpi_io_surface( 't_green_v(' // dum // ')', t_green_v_1(l)%val ) … … 6747 6743 6748 6744 END SUBROUTINE usm_rrd_local_mpi 6745 6749 6746 6750 6747 !--------------------------------------------------------------------------------------------------! … … 7422 7419 INTEGER(iwp) :: l !< index surface type orientation 7423 7420 7424 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< index for surface data (MPI-IO) 7421 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_end_index !< end index for surface data (MPI-IO) 7422 INTEGER(iwp), DIMENSION(nys:nyn,nxl:nxr) :: global_start_index !< start index for surface data (MPI-IO) 7425 7423 7426 7424 LOGICAL :: surface_data_to_write !< switch for MPI-I/O if PE has surface data to write … … 7548 7546 WRITE( dum, '(I1)') l 7549 7547 7550 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, & 7551 surface_data_to_write, global_start_index ) 7552 7553 CALL wrd_mpi_io( 'usm_start_index_h_' // dum, surf_usm_h(l)%start_index ) 7554 CALL wrd_mpi_io( 'usm_end_index_h_' // dum, surf_usm_h(l)%end_index ) 7548 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, & 7549 surface_data_to_write, global_start_index, & 7550 global_end_index ) 7551 7555 7552 CALL wrd_mpi_io( 'usm_global_start_h_' // dum, global_start_index ) 7553 CALL wrd_mpi_io( 'usm_global_end_h_' // dum, global_end_index ) 7556 7554 7557 7555 IF ( .NOT. surface_data_to_write ) CYCLE … … 7574 7572 7575 7573 CALL rd_mpi_io_surface_filetypes( surf_usm_v(l)%start_index, surf_usm_v(l)%end_index, & 7576 surface_data_to_write, global_start_index ) 7577 7578 CALL wrd_mpi_io( 'usm_start_index_v_' // dum, surf_usm_v(l)%start_index ) 7579 CALL wrd_mpi_io( 'usm_end_index_v_' // dum, surf_usm_v(l)%end_index ) 7574 surface_data_to_write, global_start_index, & 7575 global_end_index ) 7576 7580 7577 CALL wrd_mpi_io( 'usm_global_start_v_' // dum, global_start_index ) 7578 CALL wrd_mpi_io( 'usm_global_end_v_' // dum, global_end_index ) 7581 7579 7582 7580 IF ( .NOT. surface_data_to_write ) CYCLE … … 7596 7594 WRITE( dum, '(I1)') l 7597 7595 7598 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, & 7599 surface_data_to_write, global_start_index ) 7600 7601 CALL wrd_mpi_io( 'usm_start_index_h_2_' // dum, surf_usm_h(l)%start_index ) 7602 CALL wrd_mpi_io( 'usm_end_index_h_2_' // dum, surf_usm_h(l)%end_index ) 7596 CALL rd_mpi_io_surface_filetypes( surf_usm_h(l)%start_index, surf_usm_h(l)%end_index, & 7597 surface_data_to_write, global_start_index, & 7598 global_end_index ) 7599 7603 7600 CALL wrd_mpi_io( 'usm_global_start_h_2_' // dum, global_start_index ) 7601 CALL wrd_mpi_io( 'usm_global_end_h_2_' // dum, global_end_index ) 7604 7602 7605 7603 IF ( .NOT. surface_data_to_write ) CYCLE … … 7616 7614 7617 7615 CALL rd_mpi_io_surface_filetypes( surf_usm_v(l)%start_index, surf_usm_v(l)%end_index, & 7618 surface_data_to_write, global_start_index ) 7619 7620 CALL wrd_mpi_io( 'usm_start_index_v_2_' //dum, surf_usm_v(l)%start_index ) 7621 CALL wrd_mpi_io( 'usm_end_index_v_2_' // dum, surf_usm_v(l)%end_index ) 7616 surface_data_to_write, global_start_index, & 7617 global_end_index ) 7618 7622 7619 CALL wrd_mpi_io( 'usm_global_start_v_2_' // dum, global_start_index ) 7620 CALL wrd_mpi_io( 'usm_global_end_v_2_' // dum, global_end_index ) 7623 7621 7624 7622 IF ( .NOT. surface_data_to_write ) CYCLE -
palm/trunk/SOURCE/write_restart_data_mod.f90
r4848 r4893 24 24 ! ----------------- 25 25 ! $Id$ 26 ! version number update because of revised output of surface data via MPI-IO for better performance 27 ! 28 ! 4848 2021-01-21 15:51:51Z gronemeier 26 29 ! bugfix: removed syn_turb_gen from restart files 27 30 ! … … 201 204 INTEGER :: i !< loop index 202 205 203 binary_version_global = '5. 2'206 binary_version_global = '5.3' 204 207 205 208 IF ( restart_data_format_output == 'fortran_binary' ) THEN
Note: See TracChangeset
for help on using the changeset viewer.