Changeset 4893 for palm/trunk/SOURCE/restart_data_mpi_io_mod.f90
- Timestamp:
- Mar 2, 2021 4:39:14 PM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/restart_data_mpi_io_mod.f90
r4857 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance 28 ! 29 ! 4857 2021-01-26 07:24:41Z raasch 27 30 ! bugfix: allocation of 3d-int4 array moved from particle output to standard output 28 31 ! … … 155 158 myidx, & 156 159 myidy, & 157 npex, &158 npey, &159 160 numprocs, & 160 161 pdims … … 183 184 INTEGER(iwp) :: fh = -1 !< MPI-IO file handle 184 185 #if defined( __parallel ) 185 INTEGER(iwp) :: fhs = -1 !< MPI-IO file handle to open file with comm2d always186 #endif187 186 INTEGER(iwp) :: ft_surf = -1 !< MPI filetype surface data 188 #if defined( __parallel )189 187 INTEGER(iwp) :: ft_2di_nb !< MPI filetype 2D array INTEGER no outer boundary 190 188 INTEGER(iwp) :: ft_2d !< MPI filetype 2D array REAL with outer boundaries … … 196 194 INTEGER(iwp) :: glo_start !< global start index on this PE 197 195 #if defined( __parallel ) 198 INTEGER(iwp) :: local_start !<199 #endif200 INTEGER(iwp) :: nr_iope !<201 INTEGER(iwp) :: nr_val !< local number of values in x and y direction202 #if defined( __parallel )203 196 INTEGER(iwp) :: win_2di !< 204 197 INTEGER(iwp) :: win_2dr !< … … 207 200 INTEGER(iwp) :: win_3dr !< 208 201 INTEGER(iwp) :: win_3ds !< 202 INTEGER(iwp) :: win_end = -1 !< 203 INTEGER(iwp) :: win_glost = -1 !< 204 INTEGER(iwp) :: win_out = -1 !< 205 INTEGER(iwp) :: win_start = -1 !< 209 206 INTEGER(iwp) :: win_surf = -1 !< 210 207 #endif … … 216 213 INTEGER(iwp), DIMENSION(:,:), POINTER, CONTIGUOUS :: array_2di !< 217 214 218 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_end_index !< 219 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_global_start !< 215 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_end_index !< extended end index, every grid cell has at least one value 216 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_start_index !< 217 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_end_index !< module copy of end_index 220 218 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_start_index !< 219 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: thread_index !< 220 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: target_thread !< 221 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: transfer_index !< 222 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: thread_values !< 223 ! 224 !-- Indices for cyclic fill 225 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: o_start_index !< 226 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_start_index !< 227 !#if defined( __parallel ) 228 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: o_end_index !< extended end index, every grid cell has at least one value 229 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_end_index !< extended end index, every grid cell has at least one value 230 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_global_start !< 231 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: c_global_end !< 232 !#endif 221 233 222 234 INTEGER(isp), DIMENSION(:,:,:), POINTER, CONTIGUOUS :: array_3di4 !< 223 235 INTEGER(idp), DIMENSION(:,:,:), POINTER, CONTIGUOUS :: array_3di8 !< 224 236 225 LOGICAL :: all_pes_write !< all PEs have data to write226 237 LOGICAL :: filetypes_created !< 227 238 LOGICAL :: io_on_limited_cores_per_node !< switch to shared memory MPI-IO … … 229 240 LOGICAL :: wr_flag !< file is opened for write 230 241 242 #if defined( __parallel ) 243 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: local_indices 244 #endif 245 246 REAL(wp), DIMENSION(:), POINTER, CONTIGUOUS :: array_out !< 231 247 #if defined( __parallel ) 232 248 REAL(wp), DIMENSION(:), POINTER, CONTIGUOUS :: array_1d !< … … 251 267 INTEGER(iwp) :: nr_int !< number of INTEGER entries in header 252 268 INTEGER(iwp) :: nr_real !< number of REAL entries in header 269 INTEGER(iwp) :: pes_along_x !< number of PEs along x-direction during writing restart file 270 INTEGER(iwp) :: pes_along_y !< number of PEs along y-direction during writing restart file 253 271 INTEGER(iwp) :: total_nx !< total number of points in x-direction 254 272 INTEGER(iwp) :: total_ny !< total number of points in y-direction 255 273 END TYPE general_header 256 274 257 TYPE(general_header), TARGET :: tgh !<275 TYPE(general_header), TARGET, PUBLIC :: tgh !< 258 276 259 277 TYPE(sm_class) :: sm_io !< … … 421 439 wrd_mpi_io_surface 422 440 423 424 441 CONTAINS 425 442 … … 452 469 TYPE(C_PTR) :: buf_ptr !< 453 470 #endif 471 454 472 455 473 offset = 0 … … 467 485 io_file_name = file_name 468 486 ! 469 !-- Setup for IO on a limited number of threads per node (using shared memory MPI)487 !-- Setup for IO on a limited number of PEs per node (using shared memory MPI) 470 488 IF ( rd_flag ) THEN 471 489 set_filetype = .TRUE. … … 822 840 823 841 ! 824 !-- TODO: describe in more detail what is done here and why it is done825 !-- save grid of main run842 !-- Save grid information of the mainrun, i.e. grid variables like nxl, nxr, nys, nyn and other 843 !-- values are stored within the mainrun_grid structure 826 844 CALL mainrun_grid%save_grid_into_this_class() 827 845 … … 834 852 rma_offset_s = 0 835 853 ! 836 !-- Determine, if gridpoints of the prerun are located on this thread.854 !-- Determine, if gridpoints of the prerun are located on this PE. 837 855 !-- Set the (cyclic) prerun grid. 838 856 nxr = MIN( nxr, nx_on_file ) … … 857 875 ny = ny_on_file 858 876 ! 859 !-- Determine, if this threadis doing IO877 !-- Determine, if this PE is doing IO 860 878 IF ( nnx > 0 .AND. nny > 0 ) THEN 861 879 color = 1 … … 892 910 #endif 893 911 ! 894 !-- Allocate 2d buffers as RMA window, accessible on all threads912 !-- Allocate 2d buffers as RMA window, accessible on all PEs 895 913 IF ( pe_active_for_read ) THEN 896 914 ALLOCATE( rmabuf_2di(nys:nyn,nxl:nxr) ) … … 918 936 919 937 ! 920 !-- Allocate 3d buffer as RMA window, accessable on all threads938 !-- Allocate 3d buffer as RMA window, accessable on all PEs 921 939 IF ( pe_active_for_read ) THEN 922 940 ALLOCATE( rmabuf_3d(nzb:nzt+1,nys:nyn,nxl:nxr) ) … … 932 950 933 951 ! 934 !-- TODO: comment in more detail, what is done here, and why 935 !-- save small grid 952 !-- Save grid of the prerun, i.e. grid variables like nxl, nxr, nys, nyn and other values 953 !-- are stored within the prerun_grid structure. 954 !-- The prerun grid can later be activated by calling prerun_grid%activate_grid_from_this_class() 936 955 CALL prerun_grid%save_grid_into_this_class() 937 956 prerun_grid%comm2d = comm_cyclic_fill … … 1152 1171 1153 1172 1154 !kk write(9,*) 'Here is rma_cylic_fill_real_2d ',nxl,nxr,nys,nyn; FLUSH(9)1155 1156 1173 ! 1157 1174 !-- Reading 2d real array on prerun grid … … 1297 1314 !-- array would be dimensioned in the caller subroutine like this: 1298 1315 !-- INTEGER, DIMENSION(nysg:nyng,nxlg:nxrg):: data 1299 message_string = '2d-INTEGER array "' // TRIM( name ) // '" to be read from restart ' //&1300 'f ile is defined with illegal dimensions in the PALM code'1316 message_string = '2d-INTEGER array with nbgp "' // TRIM( name ) // '" to be read ' // & 1317 'from restart file is defined with illegal dimensions in the PALM code' 1301 1318 CALL message( 'rrd_mpi_io_int_2d', 'PA0723', 3, 2, 0, 6, 0 ) 1302 1319 … … 1374 1391 1375 1392 1376 CALL prerun_grid%activate_grid_from_this_class()1377 1378 1393 IF ( pe_active_for_read ) THEN 1394 CALL prerun_grid%activate_grid_from_this_class() 1395 1379 1396 #if defined( __parallel ) 1380 1397 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_INTEGER, ft_2di_nb, 'native', & 1381 1398 MPI_INFO_NULL, ierr ) 1382 1399 CALL MPI_FILE_READ_ALL( fh, array_2di, SIZE( array_2di ), MPI_INTEGER, status, ierr ) 1400 #else 1401 CALL posix_lseek( fh, array_position ) 1402 CALL posix_read( fh, array_2di, SIZE( array_2di ) ) 1383 1403 #endif 1384 1404 DO i = nxl, nxr … … 1386 1406 ENDDO 1387 1407 data(1:nny,1:nnx) = rmabuf_2di 1388 ENDIF 1389 1390 CALL mainrun_grid%activate_grid_from_this_class()1408 1409 CALL mainrun_grid%activate_grid_from_this_class() 1410 ENDIF 1391 1411 1392 1412 #if defined( __parallel ) … … 1396 1416 #endif 1397 1417 1398 IF ( .NOT. pe_active_for_read ) THEN 1399 1400 is = nxl 1401 ie = nxr 1402 js = nys 1403 je = nyn 1404 1405 ELSE 1406 1407 is = nxl 1408 ie = nxr 1409 js = prerun_grid%nys+1 1410 je = nyn 1411 DO i = is, ie 1412 DO j = js, je 1413 i_remote = MOD(i,nx_on_file+1) 1414 j_remote = MOD(j,ny_on_file+1) 1415 rem_pe = remote_pe(i_remote,j_remote) 1416 rem_offs = rma_offset(i_remote,j_remote) 1417 nval = 1 1418 1419 #if defined( __parallel ) 1420 IF ( rem_pe /= myid ) THEN 1421 CALL MPI_GET( data(j-nys+1,i-nxl+1), nval, MPI_INTEGER, rem_pe, rem_offs, nval, & 1422 MPI_INTEGER, rmawin_2di, ierr ) 1423 ELSE 1424 data(j-nys+1,i-nxl+1) = rmabuf_2di(j_remote,i_remote) 1425 ENDIF 1426 #else 1427 data(j-nys+1,i-nxl+1) = array_2di(i_remote,j_remote) 1428 #endif 1429 ENDDO 1430 ENDDO 1431 is = prerun_grid%nxr+1 1432 ie = nxr 1433 js = nys 1434 je = nyn 1435 1436 ENDIF 1418 is = nxl 1419 ie = nxr 1420 js = nys 1421 je = nyn 1437 1422 1438 1423 DO i = is, ie … … 1717 1702 ierr ) 1718 1703 CALL MPI_FILE_READ_ALL( fh, array_3d, SIZE( array_3d ), MPI_REAL, status, ierr ) 1704 #else 1705 CALL posix_lseek( fh, array_position ) 1706 CALL posix_read( fh, array_3d, SIZE( array_3d ) ) 1719 1707 #endif 1720 1708 DO i = nxl, nxr … … 1727 1715 #if defined( __parallel ) 1728 1716 ! 1729 !-- Close RMA window to allow remote access 1730 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 1731 #endif 1732 1733 IF ( .NOT. pe_active_for_read ) THEN 1734 1735 is = nxl 1736 ie = nxr 1737 js = nys 1738 je = nyn 1739 1740 ELSE 1741 1742 is = nxl 1743 ie = nxr 1744 js = prerun_grid%nys+1 1745 je = nyn 1746 1747 DO i = is, ie 1748 DO j = js, je 1749 i_remote = MOD(i,nx_on_file+1) 1750 j_remote = MOD(j,ny_on_file+1) 1751 rem_pe = remote_pe(i_remote,j_remote) 1752 rem_offs = rma_offset(i_remote,j_remote)*(nzt-nzb+2) 1753 nval = nzt-nzb+2 1754 1755 #if defined( __parallel ) 1756 IF(rem_pe /= myid) THEN 1757 CALL MPI_GET( data(nzb,j,i), nval, MPI_REAL, rem_pe, rem_offs, nval, MPI_REAL, & 1758 rmawin_3d, ierr) 1759 ELSE 1760 data(:,j,i) = rmabuf_3d(:,j_remote,i_remote) 1761 ENDIF 1762 #else 1763 data(:,j,i) = array_3d(:,i_remote,j_remote) 1764 #endif 1765 ENDDO 1766 ENDDO 1767 is = prerun_grid%nxr+1 1768 ie = nxr 1769 js = nys 1770 je = nyn 1771 1772 ENDIF 1717 !-- Close RMA window to allow remote access 1718 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 1719 #endif 1720 1721 is = nxl 1722 ie = nxr 1723 js = nys 1724 je = nyn 1773 1725 1774 1726 DO i = is, ie 1775 1727 DO j = js, je 1776 i_remote = MOD( i,nx_on_file+1)1777 j_remote = MOD( j,ny_on_file+1)1728 i_remote = MOD( i, nx_on_file+1 ) 1729 j_remote = MOD( j, ny_on_file+1 ) 1778 1730 rem_pe = remote_pe(i_remote,j_remote) 1779 1731 rem_offs = rma_offset(i_remote,j_remote) * ( nzt-nzb+2 ) … … 1850 1802 1851 1803 IF ( found ) THEN 1852 #if defined( __parallel )1853 1804 CALL rd_mpi_io_create_filetypes_3dsoil( nzb_soil, nzt_soil ) 1805 #if defined( __parallel ) 1854 1806 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 1855 1807 IF ( sm_io%iam_io_pe ) THEN … … 1874 1826 ENDIF 1875 1827 1828 #if defined( __parallel ) 1829 IF ( sm_io%is_sm_active() ) THEN 1830 CALL MPI_WIN_FREE( win_3ds, ierr ) 1831 ELSE 1832 DEALLOCATE( array_3d_soil ) 1833 ENDIF 1834 #else 1835 DEALLOCATE( array_3d_soil ) 1836 #endif 1837 1876 1838 ELSE 1877 1839 … … 2042 2004 2043 2005 IF ( header_array_index == max_nr_arrays ) THEN 2044 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2006 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2007 CALL message( 'wrd_mpi_io_real_2d', 'PA0585', 1, 2, 0, 6, 0 ) 2045 2008 ENDIF 2046 2009 … … 2107 2070 2108 2071 IF ( header_array_index == max_nr_arrays ) THEN 2109 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2072 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2073 CALL message( 'wrd_mpi_io_int_2d', 'PA0585', 1, 2, 0, 6, 0 ) 2110 2074 ENDIF 2111 2075 … … 2182 2146 2183 2147 IF ( header_array_index == max_nr_arrays ) THEN 2184 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2148 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2149 CALL message( 'wrd_mpi_io_int4_3d', 'PA0585', 1, 2, 0, 6, 0 ) 2185 2150 ENDIF 2186 2151 … … 2225 2190 INT( (iog%nx+1), KIND = rd_offset_kind ) * isp 2226 2191 2227 write(9,*) 'array_position int4_3d ',trim(name),' ',array_position2228 2229 2192 END SUBROUTINE wrd_mpi_io_int4_3d 2230 2193 … … 2250 2213 2251 2214 IF ( header_array_index == max_nr_arrays ) THEN 2252 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2215 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2216 CALL message( 'wrd_mpi_io_int8_3d', 'PA0585', 1, 2, 0, 6, 0 ) 2253 2217 ENDIF 2254 2218 … … 2293 2257 INT( (iog%nx+1), KIND = rd_offset_kind ) * dp 2294 2258 2295 write(9,*) 'array_position int8_3d ',trim(name),' ',array_position2296 2297 2259 END SUBROUTINE wrd_mpi_io_int8_3d 2298 2260 … … 2318 2280 2319 2281 IF ( header_array_index == max_nr_arrays ) THEN 2320 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2282 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2283 CALL message( 'wrd_mpi_io_real_3d', 'PA0585', 1, 2, 0, 6, 0 ) 2321 2284 ENDIF 2322 2285 … … 2395 2358 2396 2359 IF ( header_array_index == max_nr_arrays ) THEN 2397 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2360 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2361 CALL message( 'wrd_mpi_io_real_3d_soil', 'PA0585', 1, 2, 0, 6, 0 ) 2398 2362 ENDIF 2399 2363 … … 2402 2366 header_array_index = header_array_index + 1 2403 2367 2404 #if defined( __parallel )2405 2368 CALL rd_mpi_io_create_filetypes_3dsoil( nzb_soil, nzt_soil ) 2406 #endif2407 2369 2408 2370 IF ( include_total_domain_boundaries) THEN … … 2432 2394 ENDIF 2433 2395 CALL sm_io%sm_node_barrier() 2396 2397 IF ( sm_io%is_sm_active() ) THEN 2398 CALL MPI_WIN_FREE( win_3ds, ierr ) 2399 ELSE 2400 DEALLOCATE( array_3d_soil ) 2401 ENDIF 2402 IF ( sm_io%iam_io_pe ) THEN 2403 CALL MPI_TYPE_FREE( ft_3dsoil, ierr ) 2404 ENDIF 2434 2405 #else 2435 2406 CALL posix_lseek( fh, array_position ) 2436 2407 CALL posix_write( fh, array_3d_soil, SIZE( array_3d_soil ) ) 2408 DEALLOCATE( array_3d_soil ) 2437 2409 #endif 2438 2410 ! … … 2589 2561 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, comm2d, ierr ) 2590 2562 ELSE 2591 IF 2563 IF( sm_io%iam_io_pe ) THEN 2592 2564 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2565 ENDIF 2566 IF ( myid == 0 ) THEN 2593 2567 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2594 CALL MPI_FILE_READ _ALL( fh, data, SIZE( data ), MPI_REAL, status, ierr )2568 CALL MPI_FILE_READ( fh, data, SIZE( data ), MPI_REAL, status, ierr ) 2595 2569 ENDIF 2596 IF ( sm_io%is_sm_active() ) THEN 2597 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, sm_io%comm_shared, ierr ) 2598 ENDIF 2570 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, comm2d, ierr ) 2599 2571 ENDIF 2600 2572 #else … … 2749 2721 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 2750 2722 ENDIF 2751 CALL MPI_BCAST( data, SIZE( data ), MPI_ REAL, 0, comm2d, ierr )2723 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, comm2d, ierr ) 2752 2724 ELSE 2753 IF 2725 IF( sm_io%iam_io_pe ) THEN 2754 2726 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2727 ENDIF 2728 IF ( myid == 0 ) THEN 2755 2729 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2756 CALL MPI_FILE_READ _ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr )2730 CALL MPI_FILE_READ( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 2757 2731 ENDIF 2758 IF ( sm_io%is_sm_active() ) THEN 2759 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 2760 ENDIF 2761 ENDIF 2732 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, comm2d, ierr ) 2733 ENDIF 2762 2734 #else 2763 2735 CALL posix_lseek( fh, array_position ) … … 2800 2772 2801 2773 IF ( header_array_index == max_nr_arrays ) THEN 2802 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2774 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2775 CALL message( 'wrd_mpi_io_global_array_real_1d', 'PA0585', 1, 2, 0, 6, 0 ) 2803 2776 ENDIF 2804 2777 … … 2939 2912 2940 2913 IF ( header_array_index == max_nr_arrays ) THEN 2941 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 2914 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 2915 CALL message( 'wrd_mpi_io_global_array_int_1d', 'PA0585', 1, 2, 0, 6, 0 ) 2942 2916 ENDIF 2943 2917 … … 3030 3004 ENDDO 3031 3005 3032 write(9,*) 'particle_size_read ',particle_size,array_size,array_position,sum(prt_global_index)3033 3034 3006 ALLOCATE( prt_data(MAX(array_size,1)) ) 3035 3007 … … 3078 3050 array_position = prt_nr_bytes 3079 3051 3080 write(9,*) 'array_position after particle read ',array_position,prt_nr_bytes,rs3081 3082 3052 DEALLOCATE( prt_data ) 3083 3053 … … 3092 3062 ! ------------ 3093 3063 !> Read 1d-REAL surface data array with MPI-IO. 3094 !--------------------------------------------------------------------------------------------------! 3095 SUBROUTINE rrd_mpi_io_surface( name, data, first_index ) 3064 !> This is a recursive subroutine. In case of cyclic fill mode it may call itself for reading parts 3065 !> of the prerun grid. 3066 !--------------------------------------------------------------------------------------------------! 3067 RECURSIVE SUBROUTINE rrd_mpi_io_surface( name, data, first_index ) 3096 3068 3097 3069 IMPLICIT NONE … … 3099 3071 CHARACTER(LEN=*), INTENT(IN) :: name !< 3100 3072 3073 INTEGER(iwp), OPTIONAL :: first_index !< 3074 INTEGER(iwp) :: i !< 3075 INTEGER(iwp) :: j !< 3076 INTEGER(iwp) :: lo_first_index !< 3077 3078 #if defined( __parallel ) 3079 INTEGER(iwp) :: buf_start !< 3101 3080 INTEGER(KIND=rd_offset_kind) :: disp !< displacement of actual indices 3102 INTEGER(KIND=rd_offset_kind) :: disp_f !< displacement in file 3103 INTEGER(KIND=rd_offset_kind) :: disp_n !< displacement of next column 3104 INTEGER(iwp), OPTIONAL :: first_index !< 3105 3106 INTEGER(iwp) :: i !< 3107 INTEGER(iwp) :: i_f !< 3108 INTEGER(iwp) :: j !< 3109 INTEGER(iwp) :: j_f !< 3110 INTEGER(iwp) :: lo_first_index !< 3111 INTEGER(iwp) :: nr_bytes !< 3112 INTEGER(iwp) :: nr_bytes_f !< 3113 INTEGER(iwp) :: nr_words !< 3114 #if defined( __parallel ) 3115 INTEGER, DIMENSION(rd_status_size) :: status !< 3116 #else 3117 TYPE(C_PTR) :: buf !< 3118 #endif 3119 3120 LOGICAL :: found !< 3081 INTEGER(iwp) :: ie !< 3082 INTEGER(iwp) :: ind_gb !< 3083 INTEGER(iwp) :: ind_out !< 3084 INTEGER(iwp) :: is !< 3085 INTEGER(iwp) :: n !< 3086 INTEGER(iwp) :: n_trans !< 3087 3088 INTEGER(iwp),DIMENSION(0:numprocs-1) :: lo_index !< 3089 INTEGER, DIMENSION(rd_status_size) :: status !< 3090 #endif 3091 LOGICAL :: found !< 3121 3092 3122 3093 REAL(wp), INTENT(OUT), DIMENSION(:), TARGET :: data !< 3094 #if defined( __parallel ) 3095 REAL(wp),DIMENSION(:),ALLOCATABLE :: put_buffer !< 3096 #endif 3123 3097 3124 3098 … … 3132 3106 DO i = 1, tgh%nr_arrays 3133 3107 IF ( TRIM( array_names(i) ) == TRIM( name ) ) THEN 3108 ! 3109 !-- ATTENTION: The total_number_of_surface_values and wp MUST be INTERGER(8). 3110 !-- The compiler (at least Intel) first computes total_number_of_surface_values*wp 3111 !-- and then does the conversion to INTEGER(8). 3112 !-- This may lead to wrong results when total_number_of_surface_values*wp is > 2*10**6 3134 3113 array_position = array_offset(i) + ( lo_first_index - 1 ) * & 3135 total_number_of_surface_values * wp3114 INT( total_number_of_surface_values, idp ) * INT( wp, idp ) 3136 3115 found = .TRUE. 3137 3116 EXIT … … 3139 3118 ENDDO 3140 3119 3141 disp = -1 3142 disp_f = -1 3143 disp_n = -1 3120 ! 3121 !-- In case of 2d-data, name is written only once 3122 IF ( lo_first_index == 1 ) THEN 3123 3124 IF ( header_array_index == max_nr_arrays ) THEN 3125 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 3126 CALL message( 'rrd_mpi_io_surface', 'PA0585', 1, 2, 0, 6, 0 ) 3127 ENDIF 3128 3129 array_names(header_array_index) = name 3130 array_offset(header_array_index) = array_position 3131 header_array_index = header_array_index + 1 3132 3133 ENDIF 3134 3144 3135 IF ( found ) THEN 3145 3146 3136 IF ( cyclic_fill_mode ) THEN 3147 3137 3148 3138 CALL rrd_mpi_io_surface_cyclic_fill 3139 RETURN 3149 3140 3150 3141 ELSE 3151 3152 IF ( MAXVAL( m_global_start ) == -1 ) RETURN ! Nothing to do on this PE 3142 #if defined( __parallel ) 3143 ! 3144 !-- Read data from restart file 3145 CALL sm_io%sm_node_barrier() ! has no effect if I/O on limited number of cores is inactive 3146 IF ( sm_io%iam_io_pe ) THEN 3147 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_surf, 'native', & 3148 MPI_INFO_NULL, ierr ) 3149 CALL MPI_FILE_READ_ALL ( fh, array_out, SIZE(array_out), MPI_REAL, status, ierr ) 3150 ENDIF 3151 CALL sm_io%sm_node_barrier() 3152 3153 ! 3154 !-- Copy data into transfer buffer. Data is organized in a way that only one MPI_PUT to the 3155 !-- respective PE ist required. 3156 ALLOCATE( put_buffer(SUM( transfer_index(4,:) )) ) 3157 3158 ind_gb = 1 3159 DO i = 1, SIZE( local_indices, 2 ) 3160 ind_out = local_indices(1,i) 3161 DO j = 1, local_indices(2,i) 3162 put_buffer(ind_gb) = array_out(ind_out) 3163 ind_out = ind_out + 1 3164 ind_gb = ind_gb + 1 3165 ENDDO 3166 ENDDO 3167 ! 3168 !-- Transfer data from I/O PEs to the respective PEs to which they belong. 3169 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3170 3171 buf_start = 1 3172 DO n = 0, numprocs-1 3173 n_trans = transfer_index(4,n) 3174 IF ( n_trans > 0 ) THEN 3175 disp = transfer_index(3,n) - 1 3176 CALL MPI_PUT( put_buffer(buf_start), n_trans, MPI_REAL, n, disp, n_trans, MPI_REAL,& 3177 win_surf, ierr) 3178 buf_start = buf_start + n_trans 3179 ENDIF 3180 ENDDO 3181 3182 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3183 DEALLOCATE( put_buffer ) 3184 ! 3185 !-- Copy from RMA window into output array (data) to allow transfering data to target PEs. 3186 !-- Check, if the number of surface values per grid cell match the index setup. 3187 lo_index = thread_values 3153 3188 DO i = nxl, nxr 3154 3189 DO j = nys, nyn 3155 3156 IF ( m_global_start(j,i) > 0 ) THEN 3157 disp = array_position+(m_global_start(j,i)-1) * wp 3158 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 3159 nr_bytes = nr_words * wp 3190 is = lo_index(target_thread(j,i)) + 1 3191 ie = is + m_end_index(j,i) - m_start_index(j,i) 3192 data(m_start_index(j,i):m_end_index(j,i)) = array_1d(is:ie) 3193 lo_index(target_thread(j,i)) = lo_index(target_thread(j,i)) + & 3194 e_end_index(j,i) - e_start_index(j,i) + 1 3195 ! 3196 !-- TODO: Test can be removed later. 3197 IF ( e_end_index(j,i)-e_start_index(j,i)+1 /= NINT( array_1d(is-1) ) ) THEN 3198 WRITE( 9, '(A,6I8)' ) 'Nr surface values does not match ', j, i, & 3199 e_start_index(j,i), e_end_index(j,i), & 3200 e_end_index(j,i)-e_start_index(j,i)+1 , & 3201 NINT( array_1d(is-1) ) 3202 FLUSH( 9 ) 3203 CALL MPI_ABORT( comm2d, 1, ierr ) 3160 3204 ENDIF 3161 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry3162 disp_f = disp3163 nr_bytes_f = 03164 i_f = i3165 j_f = j3166 ENDIF3167 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry3168 disp_n = -13169 IF ( nr_bytes > 0 ) THEN3170 nr_bytes_f = nr_bytes_f+nr_bytes3171 ENDIF3172 ELSEIF ( j == nyn ) THEN ! Next x3173 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN3174 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp3175 ELSE3176 CYCLE3177 ENDIF3178 ELSE3179 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN3180 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp3181 ELSE3182 CYCLE3183 ENDIF3184 ENDIF3185 3186 3187 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block3188 nr_bytes_f = nr_bytes_f + nr_bytes3189 ELSE ! Read3190 #if defined( __parallel )3191 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr )3192 nr_words = nr_bytes_f / wp3193 CALL MPI_FILE_READ( fhs, data(m_start_index(j_f,i_f)), nr_words, MPI_REAL, status, &3194 ierr )3195 #else3196 !3197 !-- Use C_PTR here, because posix read does not work with indexed array3198 buf = C_LOC( data(m_start_index(j_f,i_f)) )3199 CALL posix_lseek( fh, disp_f )3200 CALL posix_read( fh, buf, nr_bytes_f )3201 #endif3202 disp_f = disp3203 nr_bytes_f = nr_bytes3204 i_f = i3205 j_f = j3206 ENDIF3207 3208 3205 ENDDO 3209 3206 ENDDO 3210 ENDIF 3211 3212 3213 ELSE 3214 3215 message_string = 'surface array "' // TRIM( name ) // '" not found in restart file' 3216 CALL message( 'rrd_mpi_io_surface', 'PA0722', 3, 2, 0, 6, 0 ) 3207 3208 3209 #else 3210 CALL posix_lseek( fh, array_position ) 3211 CALL posix_read( fh, array_out, SIZE(array_out) ) 3212 3213 DO i = nxl, nxr 3214 DO j = nys, nyn 3215 data(m_start_index(j,i):m_end_index(j,i)) = & 3216 array_out(e_start_index(j,i)+1:e_end_index(j,i)) 3217 ! 3218 !-- TODO: Test can be removed later. 3219 IF ( e_end_index(j,i)-e_start_index(j,i)+1 /= NINT(array_out(e_start_index(j,i))) )& 3220 THEN 3221 WRITE( 9, '(A,6I8)' ) 'Nr surface values does not match ', j, i, & 3222 e_start_index(j,i), e_end_index(j,i), & 3223 e_end_index(j,i)-e_start_index(j,i)+1, & 3224 NINT( array_out(e_start_index(j,i)) ) 3225 FLUSH( 9 ) 3226 CALL ABORT() 3227 ENDIF 3228 ENDDO 3229 ENDDO 3230 #endif 3231 ENDIF 3217 3232 3218 3233 ENDIF … … 3226 3241 INTEGER(iwp) :: i !< 3227 3242 INTEGER(iwp) :: ie !< 3228 #if defined( __parallel )3229 INTEGER(iwp) :: ierr !<3230 #endif3231 3243 INTEGER(iwp) :: is !< 3232 3244 INTEGER(iwp) :: i_remote !< … … 3241 3253 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_offs !< 3242 3254 #else 3243 INTEGER(idp) :: rem_offs 3244 #endif 3245 3246 LOGICAL :: write_done !< 3247 3248 3249 ! 3250 !-- In the current version, there is only 1 value per grid cell allowed. 3251 !-- In this special case, the cyclical repetition can be done with the same method as for 2d-real 3252 !-- array. 3255 INTEGER(idp) :: rem_offs !< 3256 #endif 3257 3258 REAL(wp), DIMENSION(:), ALLOCATABLE :: c_data !< 3259 3260 3261 ! 3262 !-- ATTENTION: This version allows only 1 surface element per grid cell. 3263 ! 3264 !-- Activate grid of the smaller prerun, i.e. grid variables like nxl, nxr, nys, nyn and other 3265 !-- values are set according to the prerun settings. 3253 3266 CALL prerun_grid%activate_grid_from_this_class() 3254 3267 3255 3268 IF ( pe_active_for_read ) THEN 3256 rmabuf_2d = -1.0 3269 3270 IF ( MAXVAL( m_end_index ) <= 0 ) THEN 3271 CALL mainrun_grid%activate_grid_from_this_class() 3272 IF ( debug_output ) THEN 3273 CALL debug_message( 'PE inactive for reading restart or prerun data', 'start' ) 3274 ENDIF 3275 RETURN 3276 ENDIF 3277 3278 ALLOCATE( c_data(MAXVAL( m_end_index )) ) 3279 3280 ! 3281 !-- Recursive CALL of rrd_mpi_io_surface. 3282 !-- rrd_mpi_io_surface is called with cyclic_fill_mode = .FALSE. on the smaller prerun grid. 3283 cyclic_fill_mode = .FALSE. 3284 CALL rrd_mpi_io_surface( name, c_data ) 3285 cyclic_fill_mode = .TRUE. 3286 3257 3287 DO i = nxl, nxr 3258 3288 DO j = nys, nyn 3259 3260 IF ( m_global_start(j,i) > 0 ) THEN 3261 disp = array_position+(m_global_start(j,i)-1) * wp 3262 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 3263 nr_bytes = nr_words * wp 3264 ENDIF 3265 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry 3266 disp_f = disp 3267 nr_bytes_f = 0 3268 write_done = .TRUE. 3269 ENDIF 3270 IF( write_done ) THEN 3271 i_f = i 3272 j_f = j 3273 write_done = .FALSE. 3274 ENDIF 3275 3276 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry 3277 disp_n = -1 3278 IF ( nr_bytes > 0 ) THEN 3279 nr_bytes_f = nr_bytes_f+nr_bytes 3280 ENDIF 3281 ELSEIF ( j == nyn ) THEN ! Next x 3282 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN 3283 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp 3284 ELSE 3285 CYCLE 3286 ENDIF 3287 ELSE 3288 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN 3289 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp 3290 ELSE 3291 CYCLE 3292 ENDIF 3293 ENDIF 3294 3295 3296 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block 3297 nr_bytes_f = nr_bytes_f + nr_bytes 3298 ELSE ! Read 3299 #if defined( __parallel ) 3300 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr ) 3301 nr_words = nr_bytes_f / wp 3302 CALL MPI_FILE_READ( fhs, rmabuf_2d(j_f,i_f), nr_words, MPI_REAL, status, ierr ) 3303 #else 3304 CALL posix_lseek( fh, disp_f ) 3305 CALL posix_read( fh, rmabuf_2d(j_f:,i_f:), nr_bytes_f ) 3306 #endif 3307 3308 disp_f = disp 3309 nr_bytes_f = nr_bytes 3310 write_done = .TRUE. 3311 ENDIF 3312 3289 rmabuf_2d(j,i) = c_data(c_start_index(j,i)) 3313 3290 ENDDO 3314 3291 ENDDO 3315 3292 3316 3293 ENDIF 3317 3294 ! 3295 !-- Activate grid of the mainrun, i.e. grid variables like nxl, nxr, nys, nyn and other values 3296 !-- are set according to the mainrun settings. 3318 3297 CALL mainrun_grid%activate_grid_from_this_class() 3319 3298 … … 3324 3303 #endif 3325 3304 3326 IF ( .NOT. pe_active_for_read ) THEN 3327 3328 is = nxl 3329 ie = nxr 3330 js = nys 3331 je = nyn 3332 3333 ELSE 3334 3335 is = nxl 3336 ie = nxr 3337 js = prerun_grid%nys+1 3338 je = nyn 3339 3340 DO i = is, ie 3341 DO j = js, je 3342 i_remote = MOD(i,nx_on_file+1) 3343 j_remote = MOD(j,ny_on_file+1) 3344 rem_pe = remote_pe(i_remote,j_remote) 3345 rem_offs = rma_offset(i_remote,j_remote) 3346 nval = 1 3347 3348 #if defined( __parallel ) 3349 IF ( rem_pe /= myid ) THEN 3350 CALL MPI_GET( data(m_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, & 3351 MPI_REAL, rmawin_2d, ierr) 3352 ELSE 3353 data(m_start_index(j,i)) = rmabuf_2d(j_remote,i_remote) 3354 ENDIF 3355 #else 3356 data(m_start_index(j,i)) = array_2d(i_remote,j_remote) 3357 #endif 3358 ENDDO 3359 ENDDO 3360 is = prerun_grid%nxr+1 3361 ie = nxr 3362 js = nys 3363 je = nyn 3364 3365 ENDIF 3305 ! 3306 !-- After reading surface data on the small grid, map these data in a cyclic way to all respective 3307 !-- grid points of the main run. 3308 is = nxl 3309 ie = nxr 3310 js = nys 3311 je = nyn 3366 3312 3367 3313 DO i = is, ie 3368 3314 DO j = js, je 3369 i_remote = MOD( i,nx_on_file+1)3370 j_remote = MOD( j,ny_on_file+1)3315 i_remote = MOD( i, nx_on_file+1 ) 3316 j_remote = MOD( j, ny_on_file+1 ) 3371 3317 rem_pe = remote_pe(i_remote,j_remote) 3372 3318 rem_offs = rma_offset(i_remote,j_remote) … … 3375 3321 #if defined( __parallel ) 3376 3322 IF ( rem_pe /= myid ) THEN 3377 CALL MPI_GET( data( m_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, &3323 CALL MPI_GET( data(o_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, & 3378 3324 MPI_REAL, rmawin_2d, ierr) 3379 3325 ELSE 3380 data( m_start_index(j,i)) = rmabuf_2d(j_remote,i_remote)3326 data(o_start_index(j,i)) = rmabuf_2d(j_remote,i_remote) 3381 3327 ENDIF 3382 3328 #else 3383 data( m_start_index(j,i)) = array_2d(i_remote,j_remote)3329 data(o_start_index(j,i)) = array_2d(i_remote,j_remote) 3384 3330 #endif 3385 3331 ENDDO … … 3391 3337 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 3392 3338 #endif 3339 3340 IF ( ALLOCATED( c_data ) ) DEALLOCATE( c_data ) 3393 3341 3394 3342 END SUBROUTINE rrd_mpi_io_surface_cyclic_fill … … 3539 3487 array_position = prt_nr_bytes 3540 3488 3541 write(9,*) 'array_position after particle ',array_position,prt_nr_bytes,rs3542 3543 3489 DEALLOCATE( prt_data ) 3544 3490 … … 3556 3502 IMPLICIT NONE 3557 3503 3558 CHARACTER(LEN=*), INTENT(IN) :: name !< 3559 3560 #if defined( __parallel ) 3561 INTEGER(KIND=rd_offset_kind) :: disp !< 3562 #endif 3563 INTEGER(iwp), OPTIONAL :: first_index !< 3564 #if defined( __parallel ) 3565 INTEGER(iwp) :: i !< 3566 #endif 3567 INTEGER(iwp) :: lo_first_index !< 3568 INTEGER(KIND=rd_offset_kind) :: offset !< 3569 3570 #if defined( __parallel ) 3571 INTEGER, DIMENSION(rd_status_size) :: status !< 3572 #endif 3573 3574 REAL(wp), INTENT(IN), DIMENSION(:), TARGET :: data !< 3575 3576 3577 offset = 0 3504 CHARACTER(LEN=*), INTENT(IN) :: name !< 3505 3506 INTEGER(iwp), OPTIONAL :: first_index !< 3507 INTEGER(iwp) :: i !< 3508 INTEGER(iwp) :: j !< 3509 INTEGER(iwp) :: lo_first_index !< 3510 #if defined( __parallel ) 3511 INTEGER(iwp) :: buf_start !< 3512 INTEGER(iwp) :: ie !< 3513 INTEGER(iwp) :: is !< 3514 INTEGER(iwp) :: ind_gb !< 3515 INTEGER(iwp) :: ind_out !< 3516 INTEGER(iwp) :: n !< 3517 INTEGER(iwp) :: n_trans !< 3518 #endif 3519 3520 #if defined( __parallel ) 3521 INTEGER(KIND=MPI_ADDRESS_KIND) :: disp !< displacement in RMA window 3522 INTEGER(KIND=rd_offset_kind) :: offset !< 3523 3524 INTEGER(iwp), DIMENSION(0:numprocs-1) :: lo_index !< 3525 INTEGER(iwp), DIMENSION(rd_status_size) :: status !< 3526 #endif 3527 3528 REAL(wp), INTENT(IN), DIMENSION(:), TARGET :: data !< 3529 #if defined( __parallel ) 3530 REAL(wp), DIMENSION(:), ALLOCATABLE :: get_buffer !< 3531 #endif 3532 3533 3578 3534 lo_first_index = 1 3579 3535 … … 3581 3537 lo_first_index = first_index 3582 3538 ENDIF 3539 3583 3540 ! 3584 3541 !-- In case of 2d-data, name is written only once … … 3586 3543 3587 3544 IF ( header_array_index == max_nr_arrays ) THEN 3588 STOP '+++ maximum number of 2d/3d-array entries in restart file header exceeded' 3545 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded' 3546 CALL message( 'wrd_mpi_io_surface', 'PA0585', 1, 2, 0, 6, 0 ) 3589 3547 ENDIF 3590 3548 … … 3596 3554 3597 3555 #if defined( __parallel ) 3598 IF ( sm_io%is_sm_active() ) THEN 3599 DO i = 1, nr_val 3600 array_1d(i+local_start) = data(i) 3556 offset = 0 3557 3558 ALLOCATE( get_buffer(SUM( transfer_index(4,:) )) ) 3559 ! 3560 !-- Copy from input array (data) to RMA window to allow the target PEs to get the appropiate data. 3561 !-- At this point, a dummy surface element is added. This makes sure that every x-y grid cell owns 3562 !-- at least one surface element. This way, bookkeeping becomes much easier. 3563 lo_index = thread_values 3564 DO i = nxl, nxr 3565 DO j = nys, nyn 3566 is = lo_index(target_thread(j,i)) + 1 3567 ie = is + m_end_index(j,i) - m_start_index(j,i) 3568 ! 3569 !-- Store number of surface elements in dummy additional surface element 3570 array_1d(is-1) = e_end_index(j,i) - e_start_index(j,i) + 1 3571 array_1d(is:ie) = data(m_start_index(j,i):m_end_index(j,i)) 3572 lo_index(target_thread(j,i)) = lo_index(target_thread(j,i)) + & 3573 e_end_index(j,i) - e_start_index(j,i) + 1 3601 3574 ENDDO 3602 ELSE 3603 ! array_1d => data !kk Did not work in all cases why??? 3604 ALLOCATE( array_1d( SIZE( data ) ) ) 3605 array_1d = data 3606 ENDIF 3607 3608 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 3575 ENDDO 3576 ! 3577 !-- On target PE, get data from source PEs which are assigned for output on this PE. 3578 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3579 3580 buf_start = 1 3581 DO n = 0, numprocs-1 3582 n_trans = transfer_index(4,n) 3583 IF ( n_trans > 0 ) THEN 3584 disp = transfer_index(3,n) - 1 3585 CALL MPI_GET( get_buffer(buf_start), n_trans, MPI_REAL, n, disp, n_trans, MPI_REAL, & 3586 win_surf, ierr ) 3587 buf_start = buf_start + n_trans 3588 ENDIF 3589 ENDDO 3590 3591 CALL MPI_WIN_FENCE( 0, win_surf, ierr ) 3592 ! 3593 !-- Copy data to output buffer. Here, the outpuf buffer matches the indices global_start and 3594 !-- global_end. 3595 ind_gb = 1 3596 DO i = 1, SIZE( local_indices, 2 ) 3597 ind_out = local_indices(1,i) 3598 DO j = 1, local_indices(2,i) 3599 array_out(ind_out) = get_buffer(ind_gb) 3600 ind_out = ind_out+1 3601 ind_gb = ind_gb+1 3602 ENDDO 3603 ENDDO 3604 3605 DEALLOCATE( get_buffer ) 3606 3607 ! 3608 !-- Write data to disk. 3609 CALL sm_io%sm_node_barrier() ! has no effect if I/O on limited number of cores is inactive 3609 3610 IF ( sm_io%iam_io_pe ) THEN 3610 IF ( all_pes_write ) THEN 3611 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_surf, 'native', MPI_INFO_NULL, & 3612 ierr ) 3613 CALL MPI_FILE_WRITE_ALL( fh, array_1d, nr_iope, MPI_REAL, status, ierr ) 3614 ELSE 3615 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 3616 IF ( nr_val > 0 ) THEN 3617 disp = array_position + 8 * ( glo_start - 1 ) 3618 CALL MPI_FILE_SEEK( fh, disp, MPI_SEEK_SET, ierr ) 3619 CALL MPI_FILE_WRITE( fh, array_1d, nr_iope, MPI_REAL, status, ierr ) 3620 ENDIF 3621 ENDIF 3611 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_surf, 'native', MPI_INFO_NULL, & 3612 ierr ) 3613 CALL MPI_FILE_WRITE_ALL( fh, array_out, SIZE( array_out ), MPI_REAL, status, ierr ) 3622 3614 ENDIF 3623 3615 CALL sm_io%sm_node_barrier() 3624 IF( .NOT. sm_io%is_sm_active() ) DEALLOCATE( array_1d ) 3625 #else 3616 #else 3617 DO i = nxl, nxr 3618 DO j = nys, nyn 3619 array_out(e_start_index(j,i)) = e_end_index(j,i) - e_start_index(j,i) + 1 3620 array_out(e_start_index(j,i)+1:e_end_index(j,i)) = & 3621 data(m_start_index(j,i):m_end_index(j,i)) 3622 ENDDO 3623 ENDDO 3624 3626 3625 CALL posix_lseek( fh, array_position ) 3627 CALL posix_write( fh, data, nr_val ) 3628 #endif 3626 CALL posix_write( fh, array_out, SIZE(array_out) ) 3627 #endif 3628 3629 3629 array_position = array_position + total_number_of_surface_values * wp 3630 3630 3631 ! IF ( lo_first_index == 1 ) THEN3632 ! IF ( debug_level >= 2 .AND. nr_val > 0 ) WRITE(9,*) 'w_surf_1 ', TRIM( name ), ' ', nr_val, SUM( data(1:nr_val) )3633 ! ELSE3634 ! IF ( debug_level >= 2 .AND. nr_val > 0 ) WRITE(9,*) 'w_surf_n ', TRIM( name ), ' ', &3635 ! lo_first_index, nr_val, SUM( data(1:nr_val) )3636 ! ENDIF3637 3638 3631 END SUBROUTINE wrd_mpi_io_surface 3639 3640 3632 3641 3633 … … 3690 3682 IF ( wr_flag .AND. sm_io%iam_io_pe ) THEN 3691 3683 3692 tgh%nr_int = header_int_index - 1 3693 tgh%nr_char = header_char_index - 1 3694 tgh%nr_real = header_real_index - 1 3695 tgh%nr_arrays = header_array_index - 1 3696 tgh%total_nx = iog%nx + 1 3697 tgh%total_ny = iog%ny + 1 3684 tgh%nr_int = header_int_index - 1 3685 tgh%nr_char = header_char_index - 1 3686 tgh%nr_real = header_real_index - 1 3687 tgh%nr_arrays = header_array_index - 1 3688 tgh%total_nx = iog%nx + 1 3689 tgh%total_ny = iog%ny + 1 3690 tgh%pes_along_x = pdims(1) 3691 tgh%pes_along_y = pdims(2) 3698 3692 IF ( include_total_domain_boundaries ) THEN ! Not sure, if LOGICAL interpretation is the same for all compilers, 3699 3693 tgh%i_outer_bound = 1 ! therefore store as INTEGER in general header … … 3802 3796 !-- Close MPI-IO files 3803 3797 #if defined( __parallel ) 3804 !3805 !-- Restart file has been opened with comm2d3806 IF ( fhs /= -1 ) THEN3807 CALL MPI_FILE_CLOSE( fhs, ierr )3808 ENDIF3809 3798 ! 3810 3799 !-- Free RMA windows … … 3816 3805 #endif 3817 3806 3818 IF (.NOT. pe_active_for_read ) RETURN 3807 IF ( ALLOCATED( e_start_index ) ) DEALLOCATE( e_start_index ) 3808 IF ( ALLOCATED( e_end_index ) ) DEALLOCATE( e_end_index ) 3809 IF ( ALLOCATED( m_start_index ) ) DEALLOCATE( m_start_index ) 3810 IF ( ALLOCATED( m_end_index ) ) DEALLOCATE( m_end_index ) 3811 IF ( ALLOCATED( target_thread ) ) DEALLOCATE( target_thread ) 3812 IF ( ALLOCATED( thread_index ) ) DEALLOCATE( thread_index ) 3813 IF ( ALLOCATED( thread_values ) ) DEALLOCATE( thread_values ) 3814 IF ( ALLOCATED( transfer_index ) ) DEALLOCATE( transfer_index ) 3815 3816 IF ( .NOT. pe_active_for_read ) RETURN 3819 3817 ! 3820 3818 !-- TODO: better explain the following message … … 3860 3858 !> data is not time critical (data size is comparably small), it will be read by all cores. 3861 3859 !--------------------------------------------------------------------------------------------------! 3862 SUBROUTINE rd_mpi_io_surface_filetypes( start_index, end_index, data_to_write, global_start ) 3860 RECURSIVE SUBROUTINE rd_mpi_io_surface_filetypes( start_index, end_index, data_to_write, & 3861 global_start, global_end ) 3863 3862 3864 3863 IMPLICIT NONE 3865 3864 3866 INTEGER(iwp) :: i !< loop index 3867 INTEGER(iwp) :: j !< loop index 3868 INTEGER(KIND=rd_offset_kind) :: offset !< 3869 3870 INTEGER(iwp), DIMENSION(1) :: dims1 !< 3871 INTEGER(iwp), DIMENSION(1) :: lize1 !< 3872 INTEGER(iwp), DIMENSION(1) :: start1 !< 3873 3874 INTEGER(iwp), DIMENSION(0:numprocs-1) :: all_nr_val !< number of values for all PEs 3875 INTEGER(iwp), DIMENSION(0:numprocs-1) :: lo_nr_val !< local number of values in x and y direction 3876 3877 3878 INTEGER, INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: end_index !< 3879 INTEGER, INTENT(OUT), DIMENSION(nys:nyn,nxl:nxr) :: global_start !< 3880 INTEGER, INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: start_index !< 3881 3882 LOGICAL, INTENT(OUT) :: data_to_write !< returns, if surface data have to be written 3883 3884 ! 3885 !-- Actions during reading 3886 IF ( rd_flag ) THEN 3887 ! 3888 !-- Set start index and end index for the mainrun grid. 3889 !-- ATTENTION: This works only for horizontal surfaces with one vale per grid cell!!! 3890 IF ( cyclic_fill_mode ) THEN 3891 DO i = nxl, nxr 3892 DO j = nys, nyn 3893 start_index (j,i) = (i-nxl) * nny + j - nys + 1 3894 end_index (j,i) = start_index(j,i) 3895 ENDDO 3865 INTEGER(iwp) :: e_lo_start !< 3866 INTEGER(iwp) :: i !< loop index 3867 INTEGER(iwp) :: j !< loop index 3868 INTEGER(iwp) :: index_offset !< 3869 INTEGER(iwp) :: last_end_index !< 3870 INTEGER(iwp) :: lo_start !< 3871 INTEGER(iwp) :: nr_surfcells_pe !< 3872 INTEGER(iwp) :: rest_cells_pe !< 3873 INTEGER(iwp) :: rest_bound !< 3874 #if defined( __parallel ) 3875 INTEGER(iwp) :: io_end_index !< 3876 INTEGER(iwp) :: io_start_index !< 3877 INTEGER(iwp) :: n !< loop index 3878 INTEGER(iwp) :: nr_previous !< 3879 #endif 3880 3881 INTEGER(iwp), DIMENSION(0:numprocs-1,2) :: nr_surfcells_all_s !< 3882 INTEGER(iwp), DIMENSION(0:numprocs-1,2) :: nr_surfcells_all_r !< 3883 #if defined( __parallel ) 3884 INTEGER(iwp), DIMENSION(1) :: dims1 !< global dimension for MPI_TYPE_CREATE_SUBARRAY 3885 INTEGER(iwp), DIMENSION(1) :: lsize1 !< local size for MPI_TYPE_CREATE_SUBARRAY 3886 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_cells_to_thread !< 3887 INTEGER(iwp), DIMENSION(0:pdims(1)) :: nr_surf_cells_x !< 3888 INTEGER(iwp), DIMENSION(0:pdims(1)) :: nr_surf_cells_x_s !< 3889 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_values_to_thread !< 3890 INTEGER(iwp), DIMENSION(1) :: start1 !< start index for MPI_TYPE_CREATE_SUBARRAY 3891 INTEGER(iwp), DIMENSION(nxl:nxr) :: sum_y !< 3892 #endif 3893 3894 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: end_index !< local end indx 3895 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: global_start !< global start index 3896 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: global_end !< global end index 3897 INTEGER(iwp), INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: start_index !< local start index 3898 #if defined( __parallel ) 3899 INTEGER(iwp), DIMENSION(0:myidy,nxl:nxr) :: nr_previous_y !< 3900 INTEGER(iwp), DIMENSION(0:pdims(2),nxl:nxr) :: nr_surf_cells_y !< 3901 INTEGER(iwp), DIMENSION(0:pdims(2),nxl:nxr) :: nr_surf_cells_y_s !< 3902 INTEGER(iwp), DIMENSION(4,0:numprocs-1) :: transfer_index_s !< 3903 #endif 3904 3905 LOGICAL, INTENT(OUT) :: data_to_write !< returns .TRUE., if surface data have been written 3906 LOGICAL :: only_dummy_values !< only dummy values, i.e. no data to write 3907 3908 3909 ! 3910 !-- Clean up previous calls. 3911 #if defined( __parallel ) 3912 IF ( win_surf /= -1 ) THEN 3913 CALL MPI_WIN_FREE( win_surf, ierr ) 3914 DEALLOCATE( array_1d ) 3915 win_surf = -1 3916 ENDIF 3917 IF ( ft_surf /= -1 .AND. sm_io%iam_io_pe ) THEN 3918 CALL MPI_TYPE_FREE( ft_surf, ierr ) 3919 ENDIF 3920 ft_surf = -1 3921 IF ( sm_io%is_sm_active() ) THEN 3922 IF ( win_out /= -1 ) THEN 3923 CALL MPI_WIN_FREE( win_out, ierr ) 3924 win_out = -1 3925 ENDIF 3926 ELSE 3927 IF ( ASSOCIATED( array_out ) ) DEALLOCATE( array_out ) 3928 ENDIF 3929 #else 3930 IF ( ASSOCIATED( array_out ) ) DEALLOCATE( array_out ) 3931 #endif 3932 3933 IF ( cyclic_fill_mode ) THEN 3934 CALL cyclic_fill_surface_filetype 3935 RETURN 3936 ELSE 3937 IF ( .NOT. ALLOCATED( e_end_index ) ) ALLOCATE( e_end_index(nys:nyn,nxl:nxr) ) 3938 IF ( .NOT. ALLOCATED( e_start_index ) ) ALLOCATE( e_start_index(nys:nyn,nxl:nxr) ) 3939 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr) ) 3940 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) ) 3941 IF ( .NOT. ALLOCATED( target_thread ) ) ALLOCATE( target_thread(nys:nyn,nxl:nxr) ) 3942 IF ( .NOT. ALLOCATED( thread_index ) ) ALLOCATE( thread_index(0:numprocs-1) ) 3943 IF ( .NOT. ALLOCATED( thread_values ) ) ALLOCATE( thread_values(0:numprocs-1) ) 3944 IF ( .NOT. ALLOCATED( transfer_index ) ) ALLOCATE( transfer_index(4,0:numprocs-1) ) 3945 ENDIF 3946 3947 IF ( wr_flag) THEN 3948 ! 3949 !-- Add one dummy value at every grid box. 3950 !-- This allows to use MPI_FILE_WRITE_ALL and MPI_FILE_READ_ALL with subarray file type. 3951 index_offset = 0 3952 last_end_index = 0 3953 DO i = nxl, nxr 3954 DO j = nys, nyn 3955 e_start_index(j,i) = start_index (j,i) + index_offset 3956 IF ( end_index (j,i) - start_index(j,i) < 0 ) THEN 3957 e_end_index (j,i) = last_end_index+1 3958 last_end_index = last_end_index+1 3959 ELSE 3960 e_end_index (j,i) = end_index(j,i) + index_offset + 1 3961 last_end_index = e_end_index (j,i) 3962 ENDIF 3963 index_offset = index_offset + 1 3964 ENDDO 3965 ENDDO 3966 #if defined( __parallel ) 3967 ! 3968 !-- Compute indices for global, PE independent 1-d surface element array. 3969 nr_surf_cells_y_s = 0 3970 ! 3971 !-- Count number of surface elements in y-direction for every x. 3972 DO i = nxl, nxr 3973 nr_surf_cells_y_s(myidy,i) = SUM( e_end_index (:,i) - e_start_index (:,i) + 1 ) 3974 ENDDO 3975 ! 3976 !-- Distribute these values to all PEs along y. 3977 CALL MPI_ALLREDUCE( nr_surf_cells_y_s, nr_surf_cells_y, SIZE( nr_surf_cells_y ), & 3978 MPI_INTEGER, MPI_SUM, comm1dy, ierr ) 3979 ! 3980 !-- Sum all surface elements along y for individual x PEs 3981 nr_surf_cells_x_s = 0 3982 nr_surf_cells_x_s(myidx) = SUM( nr_surf_cells_y ) 3983 ! 3984 !-- Distribute to all PEs along x. 3985 CALL MPI_ALLREDUCE( nr_surf_cells_x_s, nr_surf_cells_x, SIZE( nr_surf_cells_x ), & 3986 MPI_INTEGER, MPI_SUM, comm1dx, ierr ) 3987 DO i = nxl, nxr 3988 nr_previous_y(:,i) = 0 3989 DO n = 1, myidy 3990 nr_previous_y(n,i) = nr_previous_y(n-1,i) + nr_surf_cells_y(n-1,i) 3896 3991 ENDDO 3897 ENDIF 3898 3899 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) ) 3900 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr) ) 3901 IF ( .NOT. ALLOCATED( m_global_start ) ) ALLOCATE( m_global_start(nys:nyn,nxl:nxr) ) 3902 ! 3903 !-- Save arrays for later reading 3904 m_start_index = start_index 3905 m_end_index = end_index 3906 m_global_start = global_start 3907 nr_val = MAXVAL( end_index ) 3908 3909 ENDIF 3910 3911 IF ( .NOT. pe_active_for_read ) RETURN 3912 3913 IF ( cyclic_fill_mode ) CALL prerun_grid%activate_grid_from_this_class() 3914 3915 offset = 0 3916 lo_nr_val= 0 3917 lo_nr_val(myid) = MAXVAL( end_index ) 3918 #if defined( __parallel ) 3919 CALL MPI_ALLREDUCE( lo_nr_val, all_nr_val, numprocs, MPI_INTEGER, MPI_SUM, comm2d, ierr ) 3920 IF ( ft_surf /= -1 .AND. sm_io%iam_io_pe ) THEN 3921 CALL MPI_TYPE_FREE( ft_surf, ierr ) ! If set, free last surface filetype 3922 ENDIF 3923 3924 IF ( win_surf /= -1 ) THEN 3925 IF ( sm_io%is_sm_active() ) THEN 3926 CALL MPI_WIN_FREE( win_surf, ierr ) 3927 ENDIF 3928 win_surf = -1 3929 ENDIF 3930 3931 IF ( sm_io%is_sm_active() .AND. rd_flag ) THEN 3932 IF ( fhs == -1 ) THEN 3933 CALL MPI_FILE_OPEN( comm2d, TRIM( io_file_name ), MPI_MODE_RDONLY, MPI_INFO_NULL, fhs, & 3934 ierr ) 3935 ENDIF 3992 ENDDO 3993 3994 sum_y(nxl) = SUM( nr_surf_cells_y(:,nxl) ) 3995 DO i = nxl, nxr 3996 IF ( i > nxl ) THEN 3997 sum_y(i) = sum_y(i-1) + SUM( nr_surf_cells_y(:,i) ) 3998 ENDIF 3999 ENDDO 4000 4001 nr_previous = 0 4002 IF ( myidx >= 1 ) THEN 4003 nr_previous = SUM(nr_surf_cells_x(0:myidx-1)) 4004 ENDIF 4005 4006 global_start(nys,nxl) = 1 + nr_previous + nr_previous_y(myidy,nxl) 4007 DO j = nys+1, nyn 4008 global_start(j,nxl) = global_start(j-1,nxl) + e_end_index(j-1,nxl) - & 4009 e_start_index(j-1,nxl) + 1 4010 ENDDO 4011 4012 DO i = nxl+1, nxr 4013 global_start(nys,i) = 1 + nr_previous + nr_previous_y(myidy,i) + sum_y(i-1) 4014 DO j = nys+1, nyn 4015 global_start(j,i) = global_start(j-1,i) + e_end_index(j-1,i) - e_start_index(j-1,i) + 1 4016 ENDDO 4017 ENDDO 4018 #else 4019 global_start = e_start_index 4020 #endif 4021 DO i = nxl, nxr 4022 DO j = nys, nyn 4023 global_end(j,i) = global_start(j,i) + e_end_index (j,i) - e_start_index (j,i) 4024 ENDDO 4025 ENDDO 4026 3936 4027 ELSE 3937 fhs = fh 3938 ENDIF 3939 #else 3940 all_nr_val(myid) = lo_nr_val(myid) 3941 #endif 3942 nr_val = lo_nr_val(myid) 4028 ! 4029 !-- In case of read, compute e_start_index and e_end_index for current processor grid. 4030 !-- This data contains one extra value for every i and j. 4031 e_lo_start = 1 4032 lo_start = 1 4033 DO i = nxl, nxr 4034 DO j = nys, nyn 4035 e_start_index(j,i) = e_lo_start 4036 e_end_index(j,i) = e_lo_start + global_end(j,i) - global_start(j,i) 4037 e_lo_start = e_lo_start + global_end(j,i) - global_start(j,i) + 1 4038 start_index(j,i) = lo_start 4039 end_index(j,i) = lo_start + global_end(j,i) - global_start(j,i) - 1 4040 lo_start = lo_start + global_end(j,i) - global_start(j,i) 4041 ENDDO 4042 ENDDO 4043 4044 ENDIF 4045 4046 nr_surfcells_all_s = 0 4047 nr_surfcells_all_s(myid,1) = MAXVAL( e_end_index ) ! don't split surface elements of one gridbox 4048 nr_surfcells_all_s(myid,2) = MAXVAL( e_end_index - e_start_index ) 4049 4050 #if defined( __parallel ) 4051 CALL MPI_ALLREDUCE( nr_surfcells_all_s, nr_surfcells_all_r, SIZE( nr_surfcells_all_s ), & 4052 MPI_INTEGER, MPI_SUM, comm2d, ierr ) 4053 #else 4054 nr_surfcells_all_r = nr_surfcells_all_s 4055 #endif 3943 4056 3944 4057 total_number_of_surface_values = 0 … … 3947 4060 glo_start = total_number_of_surface_values + 1 3948 4061 ENDIF 3949 total_number_of_surface_values = total_number_of_surface_values + all_nr_val(i)4062 total_number_of_surface_values = total_number_of_surface_values + nr_surfcells_all_r(i,1) 3950 4063 ENDDO 3951 3952 ! 3953 !-- Actions during reading 3954 IF ( rd_flag ) THEN 3955 3956 #if defined( __parallel ) 3957 CALL MPI_FILE_SET_VIEW( fhs, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 3958 #endif 3959 ENDIF 3960 3961 IF ( cyclic_fill_mode ) CALL mainrun_grid%activate_grid_from_this_class() 3962 3963 ! 3964 !-- Actions during writing 3965 IF ( wr_flag ) THEN 3966 ! 3967 !-- Create surface filetype 3968 ft_surf = -1 3969 global_start = start_index + glo_start - 1 3970 3971 WHERE ( end_index < start_index ) 3972 global_start = -1 3973 ENDWHERE 3974 3975 #if defined( __parallel ) 3976 IF ( sm_io%is_sm_active() ) THEN 3977 IF ( sm_io%iam_io_pe ) THEN 3978 ! 3979 !-- Calculate number of values of all PEs of an I/O group 3980 nr_iope = 0 3981 DO i = myid, myid+sm_io%sh_npes-1 3982 nr_iope = nr_iope + all_nr_val(i) 4064 only_dummy_values = ( MAXVAL( nr_surfcells_all_r(:,2) ) <= 0 ) 4065 4066 ! 4067 !-- Compute indices of equally distributed surface elements. 4068 !-- Number of surface values scheduled for ouput on this PE: 4069 nr_surfcells_pe = total_number_of_surface_values / numprocs 4070 rest_cells_pe = MOD( total_number_of_surface_values, numprocs ) 4071 rest_bound = rest_cells_pe * ( nr_surfcells_pe + 1 ) 4072 m_start_index = start_index 4073 m_end_index = end_index 4074 4075 ! 4076 !-- Compute number of elements on source PE, which have to be send to the corresponding target PE. 4077 #if defined( __parallel ) 4078 nr_cells_to_thread = 0 4079 nr_values_to_thread = 0 4080 DO i = nxl, nxr 4081 DO j = nys, nyn 4082 IF ( rest_cells_pe == 0 ) THEN 4083 target_thread(j,i) = ( global_start(j,i) - 1 ) / nr_surfcells_pe 4084 ELSE 4085 IF ( global_start(j,i) <= rest_bound ) THEN 4086 target_thread(j,i) = ( global_start(j,i) - 1 ) / ( nr_surfcells_pe + 1 ) 4087 ELSE 4088 target_thread(j,i) = ( global_start(j,i) - rest_bound - 1 ) / nr_surfcells_pe 4089 target_thread(j,i) = target_thread(j,i) + rest_cells_pe 4090 ENDIF 4091 ! 4092 !-- TODO: Test output, to be removed later. 4093 IF ( target_thread(j,i) >= numprocs ) THEN 4094 WRITE( 9,'(A,8I8)' ) 'target_thread ', j, i, target_thread(j,i), & 4095 global_start(j,i) , nr_surfcells_pe 4096 FLUSH( 9 ) 4097 CALL MPI_ABORT( comm2d, 1, ierr ) 4098 ENDIF 4099 ENDIF 4100 nr_cells_to_thread(target_thread(j,i)) = nr_cells_to_thread(target_thread(j,i)) + 1 4101 nr_values_to_thread(target_thread(j,i)) = nr_values_to_thread(target_thread(j,i)) + & 4102 e_end_index(j,i) - e_start_index(j,i) + 1 4103 ENDDO 4104 ENDDO 4105 4106 ! 4107 !-- Compute start index in the transfer buffer on the source side for the corresponding target PE. 4108 thread_index(0) = 1 4109 thread_values(0) = 1 4110 DO n = 1, numprocs-1 4111 thread_index(n) = thread_index(n-1) + nr_cells_to_thread(n-1) 4112 thread_values(n) = thread_values(n-1) + nr_values_to_thread(n-1) 4113 ENDDO 4114 ! 4115 !-- Buffer distribution on the source side. 4116 DO n = 0, numprocs-1 4117 transfer_index_s(1,n) = thread_index(n) 4118 transfer_index_s(2,n) = nr_cells_to_thread(n) 4119 transfer_index_s(3,n) = thread_values(n) 4120 transfer_index_s(4,n) = nr_values_to_thread(n) 4121 ENDDO 4122 4123 CALL MPI_ALLTOALL( transfer_index_s, 4, MPI_INTEGER, transfer_index, 4, MPI_INTEGER, comm2d, & 4124 ierr) 4125 ! 4126 !-- Buffer distribution on the target side side. 4127 CALL get_remote_indices() 4128 ! 4129 !-- Create surface element file type. 4130 IF ( total_number_of_surface_values > 0 .AND. .NOT. only_dummy_values) THEN 4131 data_to_write = .TRUE. 4132 ELSE 4133 data_to_write = .FALSE. 4134 ENDIF 4135 4136 CALL MPI_ALLREDUCE( global_end(nyn,nxr), dims1(1), 1, MPI_INTEGER, MPI_MAX, comm2d, ierr ) 4137 start1(1) = MINVAL( local_indices(1,:) ) - 1 4138 IF ( sm_io%is_sm_active() ) THEN 4139 CALL MPI_ALLREDUCE( SUM( local_indices(2,:) ), lsize1(1), 1, MPI_INTEGER, MPI_SUM, & 4140 sm_io%comm_shared, ierr ) 4141 ELSE 4142 lsize1(1) = SUM( local_indices(2,:) ) 4143 ENDIF 4144 4145 IF ( sm_io%iam_io_pe ) THEN 4146 IF ( total_number_of_surface_values > 0 ) THEN 4147 CALL MPI_TYPE_CREATE_SUBARRAY( 1, dims1, lsize1, start1, MPI_ORDER_FORTRAN, MPI_REAL, & 4148 ft_surf, ierr ) 4149 CALL MPI_TYPE_COMMIT( ft_surf, ierr ) 4150 ENDIF 4151 ENDIF 4152 ! 4153 !-- Allocate rma window to supply surface data to other PEs. 4154 CALL rd_alloc_rma_mem( array_1d, SUM( nr_values_to_thread ), win_surf ) 4155 ! 4156 !-- Allocate shared array on IO-PE to supply data for MPI-IO (write or read). 4157 IF ( sm_io%is_sm_active() ) THEN 4158 IF ( sm_io%iam_io_pe ) THEN 4159 io_start_index = start1(1) + 1 4160 io_end_index = start1(1) + lsize1(1) 4161 ENDIF 4162 CALL MPI_BCAST( io_start_index, 1, MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 4163 CALL MPI_BCAST( io_end_index, 1, MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 4164 CALL sm_io%sm_allocate_shared( array_out, io_start_index, io_end_index, win_out ) 4165 ELSE 4166 ALLOCATE( array_out(start1(1)+1:start1(1)+lsize1(1)) ) 4167 ENDIF 4168 #else 4169 IF ( total_number_of_surface_values > 0 .AND. .NOT. only_dummy_values ) THEN 4170 data_to_write = .TRUE. 4171 ELSE 4172 data_to_write = .FALSE. 4173 ENDIF 4174 ALLOCATE( array_out(1:total_number_of_surface_values) ) 4175 #endif 4176 4177 CONTAINS 4178 4179 SUBROUTINE cyclic_fill_surface_filetype 4180 4181 INTEGER(iwp) :: i !< loop index 4182 INTEGER(iwp) :: j !< loop index 4183 4184 4185 IF ( .NOT. ALLOCATED( o_start_index ) ) ALLOCATE( o_start_index(nys:nyn,nxl:nxr) ) 4186 IF ( .NOT. ALLOCATED( o_end_index ) ) ALLOCATE( o_end_index(nys:nyn,nxl:nxr) ) 4187 4188 lo_start = 1 4189 DO i = nxl, nxr 4190 DO j = nys, nyn 4191 o_start_index(j,i) = lo_start 4192 o_end_index(j,i) = lo_start 4193 lo_start = lo_start + 1 4194 ENDDO 4195 ENDDO 4196 start_index = o_start_index 4197 end_index = o_end_index 4198 4199 IF ( MAXVAL( global_end-global_start ) > 1 ) THEN 4200 message_string = 'cylic-fill method does not allow more than one surface element ' // & 4201 'per grid box' 4202 CALL message( 'cyclic_fill_surface_filetype', 'PA0742', 1, 2, 0, 6, 0 ) 4203 ENDIF 4204 ! 4205 !-- Activate grid of the smaller prerun, i.e. grid variables like nxl, nxr, nys, nyn and others 4206 !-- are set according to the prerun layout. 4207 CALL prerun_grid%activate_grid_from_this_class() 4208 4209 IF ( pe_active_for_read ) THEN 4210 4211 IF ( .NOT. ALLOCATED( c_global_start ) ) ALLOCATE( c_global_start(nys:nyn,nxl:nxr) ) 4212 IF ( .NOT. ALLOCATED( c_global_end ) ) ALLOCATE( c_global_end(nys:nyn,nxl:nxr) ) 4213 IF ( .NOT. ALLOCATED( c_start_index ) ) ALLOCATE( c_start_index(nys:nyn,nxl:nxr) ) 4214 IF ( .NOT. ALLOCATED( c_end_index ) ) ALLOCATE( c_end_index(nys:nyn,nxl:nxr) ) 4215 4216 DO i = nxl, nxr 4217 DO j = nys, nyn 4218 c_global_start(j,i) = global_start(j,i) 4219 c_global_end(j,i) = global_end(j,i) 3983 4220 ENDDO 3984 ELSE 3985 local_start = 0 3986 DO i = myid-sm_io%sh_rank, myid-1 3987 local_start = local_start + all_nr_val(i) 3988 ENDDO 4221 ENDDO 4222 ! 4223 !-- Recursive call of rd_mpi_io_surface_filetypes. 4224 !-- Prerun data are read, but they are treated as if they are mainrun data, just on a smaller 4225 !-- grid. 4226 cyclic_fill_mode = .FALSE. 4227 CALL rd_mpi_io_surface_filetypes( c_start_index, c_end_index, data_to_write, & 4228 c_global_start, c_global_end ) 4229 cyclic_fill_mode = .TRUE. 4230 4231 ENDIF 4232 ! 4233 !-- Activate grid of the mainrun, i.e. grid variables like nxl, nxr, nys, nyn and others 4234 !-- are set according to the mainrun layout. 4235 CALL mainrun_grid%activate_grid_from_this_class() 4236 4237 #if defined( __parallel ) 4238 CALL MPI_BCAST( data_to_write, 1, MPI_LOGICAL, 0, comm2d, ierr ) 4239 #endif 4240 4241 END SUBROUTINE cyclic_fill_surface_filetype 4242 4243 #if defined( __parallel ) 4244 ! 4245 !-- Get the indices of the surface elements inside the RMA window on the remote PE. 4246 !-- This information is required to fetch the surface element data on remote PEs 4247 !-- in rrd_mpi_io_surface and wrd_mpi_io_surface. 4248 SUBROUTINE get_remote_indices 4249 4250 INTEGER(iwp) :: buf_start !< 4251 INTEGER(iwp) :: i !< 4252 INTEGER(iwp) :: j !< 4253 INTEGER(iwp) :: n !< 4254 INTEGER(iwp) :: n_trans !< 4255 INTEGER(iwp) :: win_ind !< 4256 4257 INTEGER(KIND=MPI_ADDRESS_KIND) :: disp !< displacement in RMA window 4258 INTEGER(KIND=MPI_ADDRESS_KIND) :: winsize !< size of RMA window 4259 4260 INTEGER(iwp), DIMENSION(0:numprocs-1) :: lo_index !< 4261 4262 INTEGER(iwp), POINTER, DIMENSION(:,:) :: surf_val_index !< 4263 4264 4265 IF ( ALLOCATED( local_indices ) ) DEALLOCATE( local_indices ) 4266 ALLOCATE( local_indices(2,MAX( SUM( transfer_index(2,:) ), 2 ))) 4267 4268 local_indices(1,:) = 0 4269 local_indices(2,:) = 0 4270 4271 winsize = MAX( 2 * SUM( nr_cells_to_thread ), 2 ) 4272 4273 ALLOCATE( surf_val_index(2,winsize) ) 4274 winsize = winsize * iwp 4275 CALL MPI_WIN_CREATE( surf_val_index, winsize, iwp, MPI_INFO_NULL, comm2d, win_ind, ierr ) 4276 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4277 4278 lo_index = thread_index 4279 DO i = nxl, nxr 4280 DO j = nys, nyn 4281 surf_val_index(1,lo_index(target_thread(j,i))) = global_start(j,i) 4282 surf_val_index(2,lo_index(target_thread(j,i))) = global_end(j,i) - global_start(j,i) & 4283 + 1 4284 lo_index(target_thread(j,i)) = lo_index(target_thread(j,i)) + 1 4285 ENDDO 4286 ENDDO 4287 4288 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4289 4290 buf_start = 1 4291 DO n = 0, numprocs-1 4292 n_trans = transfer_index(2,n) 4293 IF ( n_trans > 0 ) THEN 4294 disp = 2 * ( transfer_index(1,n) - 1 ) 4295 CALL MPI_GET( local_indices(1,buf_start), 2*n_trans, MPI_INTEGER, n, disp, 2*n_trans, & 4296 MPI_INTEGER, win_ind, ierr ) 4297 buf_start = buf_start + n_trans 3989 4298 ENDIF 3990 ! 3991 !-- Get the size of shared memory window on all PEs 3992 CALL MPI_BCAST( nr_iope, 1, MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 3993 CALL sm_io%sm_allocate_shared( array_1d, 1, MAX( 1, nr_iope ), win_surf ) 3994 ELSE 3995 nr_iope = nr_val 3996 ENDIF 3997 #else 3998 nr_iope = nr_val 3999 #endif 4000 4001 ! 4002 !-- Check, if surface data exist on this PE 4003 data_to_write = .TRUE. 4004 IF ( total_number_of_surface_values == 0 ) THEN 4005 data_to_write = .FALSE. 4006 RETURN 4007 ENDIF 4008 4009 IF ( sm_io%iam_io_pe ) THEN 4010 4011 all_pes_write = ( MINVAL( all_nr_val ) > 0 ) 4012 4013 IF ( all_pes_write ) THEN 4014 dims1(1) = total_number_of_surface_values 4015 lize1(1) = nr_iope 4016 start1(1) = glo_start-1 4017 4018 #if defined( __parallel ) 4019 IF ( total_number_of_surface_values > 0 ) THEN 4020 CALL MPI_TYPE_CREATE_SUBARRAY( 1, dims1, lize1, start1, MPI_ORDER_FORTRAN, & 4021 MPI_REAL, ft_surf, ierr ) 4022 CALL MPI_TYPE_COMMIT( ft_surf, ierr ) 4023 ENDIF 4024 #endif 4299 ENDDO 4300 4301 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4302 4303 buf_start = 1 4304 DO n = 0, numprocs-1 4305 n_trans = transfer_index(2,n) 4306 IF ( n_trans > 0 ) THEN 4307 disp = transfer_index(1,n) - 1 4308 buf_start = buf_start + n_trans 4025 4309 ENDIF 4026 ENDIF 4027 4028 ENDIF 4310 ENDDO 4311 4312 CALL MPI_WIN_FREE( win_ind, ierr ) 4313 4314 DEALLOCATE( surf_val_index ) 4315 4316 END SUBROUTINE get_remote_indices 4317 4318 !--------------------------------------------------------------------------------------------------! 4319 ! Description: 4320 ! ------------ 4321 !> Allocate memory and create window for one-sided communication (1-d INTEGER array) 4322 !--------------------------------------------------------------------------------------------------! 4323 SUBROUTINE rd_alloc_rma_mem( array, idim, win ) 4324 4325 IMPLICIT NONE 4326 4327 INTEGER(iwp), INTENT(IN) :: idim !< Dimension of this 1-D array 4328 INTEGER :: ierr !< MPI error code 4329 INTEGER(iwp), INTENT(OUT) :: win !< MPI window 4330 INTEGER(KIND=MPI_ADDRESS_KIND) :: winsize !< size of RMA window 4331 4332 REAL(wp), DIMENSION(:), POINTER, INTENT(INOUT) :: array !< array to access RMA window locally 4333 4334 4335 winsize = MAX( idim, 2 ) 4336 ALLOCATE( array(winsize) ) 4337 winsize = winsize * wp 4338 CALL MPI_WIN_CREATE( array, winsize, wp, MPI_INFO_NULL, comm2d, win, ierr ) 4339 array = -1 4340 CALL MPI_WIN_FENCE( 0, win, ierr ) 4341 4342 END SUBROUTINE rd_alloc_rma_mem 4343 #endif 4029 4344 4030 4345 END SUBROUTINE rd_mpi_io_surface_filetypes 4031 4032 4346 4033 4347 … … 4079 4393 iog%nnx = iog%nnx + nbgp 4080 4394 ENDIF 4081 IF ( myidx == npex-1 .OR. npex == -1 ) THEN ! npex == 1 if -D__parallel not set4395 IF ( myidx == pdims(1)-1 ) THEN 4082 4396 iog%nxr = iog%nxr + nbgp 4083 4397 iog%nnx = iog%nnx + nbgp … … 4087 4401 iog%nny = iog%nny + nbgp 4088 4402 ENDIF 4089 IF ( myidy == npey-1 .OR. npey == -1 ) THEN ! npey == 1 if -D__parallel not set4403 IF ( myidy == pdims(2)-1 ) THEN 4090 4404 iog%nyn = iog%nyn + nbgp 4091 4405 iog%nny = iog%nny + nbgp … … 4251 4565 iog%nnx = iog%nnx + nbgp 4252 4566 ENDIF 4253 IF ( myidx == npex-1 .OR. npex == -1 ) THEN ! npex == 1 if -D__parallel not set4567 IF ( myidx == pdims(1)-1 ) THEN 4254 4568 iog%nxr = iog%nxr + nbgp 4255 4569 iog%nnx = iog%nnx + nbgp … … 4259 4573 iog%nny = iog%nny + nbgp 4260 4574 ENDIF 4261 IF ( myidy == npey-1 .OR. npey == -1 ) THEN ! npey == 1 if -D__parallel not set4575 IF ( myidy == pdims(2)-1 ) THEN 4262 4576 iog%nyn = iog%nyn + nbgp 4263 4577 iog%nny = iog%nny + nbgp … … 4326 4640 !> to a single file that contains the global arrays. It is not required for the serial mode. 4327 4641 !--------------------------------------------------------------------------------------------------! 4328 #if defined( __parallel )4329 4642 SUBROUTINE rd_mpi_io_create_filetypes_3dsoil( nzb_soil, nzt_soil ) 4330 4643 … … 4334 4647 INTEGER, INTENT(IN) :: nzt_soil !< 4335 4648 4649 #if defined( __parallel ) 4336 4650 INTEGER, DIMENSION(3) :: dims3 !< 4337 4651 INTEGER, DIMENSION(3) :: lize3 !< … … 4367 4681 CALL MPI_TYPE_COMMIT( ft_3dsoil, ierr ) 4368 4682 ENDIF 4683 #else 4684 ALLOCATE( array_3d_soil(nzb_soil:nzt_soil,iog%nxl:iog%nxr,iog%nys:iog%nyn) ) 4685 sm_io%io_grid = iog 4686 #endif 4369 4687 4370 4688 END SUBROUTINE rd_mpi_io_create_filetypes_3dsoil 4371 #endif4372 4373 4374 4689 4375 4690 !--------------------------------------------------------------------------------------------------! … … 4381 4696 4382 4697 IMPLICIT NONE 4383 4384 4698 4385 4699 #if defined( __parallel ) … … 4401 4715 4402 4716 ENDIF 4717 4403 4718 ! 4404 4719 !-- Free last surface filetype … … 4415 4730 IF ( sm_io%iam_io_pe .AND. ft_3di4 /= -1 ) THEN 4416 4731 CALL MPI_TYPE_FREE( ft_3di4, ierr ) 4732 ft_3di4 = -1 4733 ENDIF 4734 IF ( sm_io%iam_io_pe .AND. ft_3di8 /= -1 ) THEN 4417 4735 CALL MPI_TYPE_FREE( ft_3di8, ierr ) 4736 ft_3di8 = -1 4418 4737 ENDIF 4419 4738 4420 4739 IF ( sm_io%is_sm_active() .AND. win_3di4 /= -1 ) THEN 4421 4740 CALL sm_io%sm_free_shared( win_3di4 ) 4741 win_3di4 = -1 4742 ENDIF 4743 IF ( sm_io%is_sm_active() .AND. win_3di8 /= -1 ) THEN 4422 4744 CALL sm_io%sm_free_shared( win_3di8 ) 4745 win_3di8 = -1 4746 ENDIF 4747 4748 IF ( win_start /= -1 ) THEN 4749 CALL sm_io%sm_free_shared( win_start) 4750 CALL sm_io%sm_free_shared( win_end) 4751 CALL sm_io%sm_free_shared( win_glost) 4752 win_start = -1 4753 win_end = -1 4754 win_glost = -1 4423 4755 ENDIF 4424 4756 … … 4426 4758 win_surf = -1 4427 4759 #else 4428 IF ( ASSOCIATED( array_2d) ) DEALLOCATE( array_2d )4429 IF ( ASSOCIATED( array_2di) ) DEALLOCATE( array_2di )4430 IF ( ASSOCIATED( array_3d) ) DEALLOCATE( array_3d )4431 IF ( ASSOCIATED( array_3di4) ) DEALLOCATE( array_3di4 )4432 IF ( ASSOCIATED( array_3di8) ) DEALLOCATE( array_3di8 )4760 IF ( ASSOCIATED( array_2d ) ) DEALLOCATE( array_2d ) 4761 IF ( ASSOCIATED( array_2di ) ) DEALLOCATE( array_2di ) 4762 IF ( ASSOCIATED( array_3d ) ) DEALLOCATE( array_3d ) 4763 IF ( ASSOCIATED( array_3di4 ) ) DEALLOCATE( array_3di4 ) 4764 IF ( ASSOCIATED( array_3di8 ) ) DEALLOCATE( array_3di8 ) 4433 4765 #endif 4434 4766
Note: See TracChangeset
for help on using the changeset viewer.