Changeset 4896 for palm/trunk/SOURCE/restart_data_mpi_io_mod.f90
- Timestamp:
- Mar 3, 2021 4:10:18 PM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/restart_data_mpi_io_mod.f90
r4893 r4896 25 25 ! ----------------- 26 26 ! $Id$ 27 ! more meaningful variable names assigned, 28 ! redundant code removed 29 ! 30 ! 4893 2021-03-02 16:39:14Z raasch 27 31 ! revised output of surface data via MPI-IO for better performance 28 32 ! … … 206 210 INTEGER(iwp) :: win_surf = -1 !< 207 211 #endif 208 INTEGER(iwp) :: total_number_of_surface_ values !< total number of values for one variable212 INTEGER(iwp) :: total_number_of_surface_elements !< total number of surface elements for one variable 209 213 210 214 INTEGER(KIND=rd_offset_kind) :: array_position !< … … 213 217 INTEGER(iwp), DIMENSION(:,:), POINTER, CONTIGUOUS :: array_2di !< 214 218 215 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_end_index !< extended end index, every grid cell has at least one value216 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_start_index !<217 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_end_index !< module copy of end_index218 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_start_index !<219 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: thread_index !<220 INTEGER(iwp), DIMENSION(: ,:), ALLOCATABLE :: target_thread !<221 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: transfer_index !<222 INTEGER(iwp), DIMENSION(: ), ALLOCATABLE :: thread_values!<219 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_end_index !< extended end index, every grid cell has at least one value 220 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: e_start_index !< 221 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_end_index !< module copy of end_index 222 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: m_start_index !< 223 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: nr_surfaces_in_tb !< number of surface elements in transfer buffer 224 INTEGER(iwp), DIMENSION(:), ALLOCATABLE :: s_index_in_tb !< start index of surface elements in transfer buffer 225 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: s_index_in_window !< start index for the pe in the rma window 226 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: transfer_index !< 223 227 ! 224 228 !-- Indices for cyclic fill … … 334 338 PRIVATE 335 339 336 PUBLIC restart_file_size, total_number_of_surface_ values340 PUBLIC restart_file_size, total_number_of_surface_elements 337 341 338 342 ! … … 3107 3111 IF ( TRIM( array_names(i) ) == TRIM( name ) ) THEN 3108 3112 ! 3109 !-- ATTENTION: The total_number_of_surface_ values and wp MUST be INTERGER(8).3110 !-- The compiler (at least Intel) first computes total_number_of_surface_ values*wp3113 !-- ATTENTION: The total_number_of_surface_elements and wp MUST be INTERGER(8). 3114 !-- The compiler (at least Intel) first computes total_number_of_surface_elements*wp 3111 3115 !-- and then does the conversion to INTEGER(8). 3112 !-- This may lead to wrong results when total_number_of_surface_ values*wp is > 2*10**63116 !-- This may lead to wrong results when total_number_of_surface_elements*wp is > 2*10**6 3113 3117 array_position = array_offset(i) + ( lo_first_index - 1 ) * & 3114 INT( total_number_of_surface_ values, idp ) * INT( wp, idp )3118 INT( total_number_of_surface_elements, idp ) * INT( wp, idp ) 3115 3119 found = .TRUE. 3116 3120 EXIT … … 3121 3125 !-- In case of 2d-data, name is written only once 3122 3126 IF ( lo_first_index == 1 ) THEN 3123 3124 IF ( header_array_index == max_nr_arrays ) THEN3125 message_string = 'maximum number of 2d/3d-array entries in restart file header exceeded'3126 CALL message( 'rrd_mpi_io_surface', 'PA0585', 1, 2, 0, 6, 0 )3127 ENDIF3128 3127 3129 3128 array_names(header_array_index) = name … … 3184 3183 ! 3185 3184 !-- Copy from RMA window into output array (data) to allow transfering data to target PEs. 3186 !-- Check, if the number of surface values per grid cellmatch the index setup.3187 lo_index = thread_values3185 !-- Check, if the number of surface elements per horizontal gridbox match the index setup. 3186 lo_index = nr_surfaces_in_tb 3188 3187 DO i = nxl, nxr 3189 3188 DO j = nys, nyn 3190 is = lo_index( target_thread(j,i)) + 13189 is = lo_index(s_index_in_window(j,i)) + 1 3191 3190 ie = is + m_end_index(j,i) - m_start_index(j,i) 3192 3191 data(m_start_index(j,i):m_end_index(j,i)) = array_1d(is:ie) 3193 lo_index( target_thread(j,i)) = lo_index(target_thread(j,i)) +&3194 e_end_index(j,i) - e_start_index(j,i) + 13192 lo_index(s_index_in_window(j,i)) = lo_index(s_index_in_window(j,i)) + & 3193 e_end_index(j,i) - e_start_index(j,i) + 1 3195 3194 ! 3196 3195 !-- TODO: Test can be removed later. 3197 3196 IF ( e_end_index(j,i)-e_start_index(j,i)+1 /= NINT( array_1d(is-1) ) ) THEN 3198 WRITE( 9, '(A,6I8)' ) 'Nr surface values does not match ', j, i,&3197 WRITE( 9, '(A,6I8)' ) 'Nr surface elements does not match ', j, i, & 3199 3198 e_start_index(j,i), e_end_index(j,i), & 3200 3199 e_end_index(j,i)-e_start_index(j,i)+1 , & … … 3219 3218 IF ( e_end_index(j,i)-e_start_index(j,i)+1 /= NINT(array_out(e_start_index(j,i))) )& 3220 3219 THEN 3221 WRITE( 9, '(A,6I8)' ) 'Nr surface values does not match ', j, i,&3220 WRITE( 9, '(A,6I8)' ) 'Nr surface elements does not match ', j, i, & 3222 3221 e_start_index(j,i), e_end_index(j,i), & 3223 3222 e_end_index(j,i)-e_start_index(j,i)+1, & … … 3559 3558 ! 3560 3559 !-- Copy from input array (data) to RMA window to allow the target PEs to get the appropiate data. 3561 !-- At this point, a dummy surface element is added. This makes sure that every x-y grid cellowns3560 !-- At this point, a dummy surface element is added. This makes sure that every x-y gridbox owns 3562 3561 !-- at least one surface element. This way, bookkeeping becomes much easier. 3563 lo_index = thread_values3562 lo_index = nr_surfaces_in_tb 3564 3563 DO i = nxl, nxr 3565 3564 DO j = nys, nyn 3566 is = lo_index( target_thread(j,i)) + 13565 is = lo_index(s_index_in_window(j,i)) + 1 3567 3566 ie = is + m_end_index(j,i) - m_start_index(j,i) 3568 3567 ! … … 3570 3569 array_1d(is-1) = e_end_index(j,i) - e_start_index(j,i) + 1 3571 3570 array_1d(is:ie) = data(m_start_index(j,i):m_end_index(j,i)) 3572 lo_index( target_thread(j,i)) = lo_index(target_thread(j,i)) +&3573 e_end_index(j,i) - e_start_index(j,i) + 13571 lo_index(s_index_in_window(j,i)) = lo_index(s_index_in_window(j,i)) + & 3572 e_end_index(j,i) - e_start_index(j,i) + 1 3574 3573 ENDDO 3575 3574 ENDDO … … 3627 3626 #endif 3628 3627 3629 array_position = array_position + total_number_of_surface_ values * wp3628 array_position = array_position + total_number_of_surface_elements * wp 3630 3629 3631 3630 END SUBROUTINE wrd_mpi_io_surface … … 3805 3804 #endif 3806 3805 3807 IF ( ALLOCATED( e_start_index ) ) DEALLOCATE( e_start_index)3808 IF ( ALLOCATED( e_end_index ) ) DEALLOCATE( e_end_index)3809 IF ( ALLOCATED( m_start_index ) ) DEALLOCATE( m_start_index)3810 IF ( ALLOCATED( m_end_index ) ) DEALLOCATE( m_end_index)3811 IF ( ALLOCATED( target_thread ) ) DEALLOCATE( target_thread)3812 IF ( ALLOCATED( thread_index ) ) DEALLOCATE( thread_index)3813 IF ( ALLOCATED( thread_values ) ) DEALLOCATE( thread_values)3814 IF ( ALLOCATED( transfer_index ) ) DEALLOCATE( transfer_index)3806 IF ( ALLOCATED( e_start_index ) ) DEALLOCATE( e_start_index ) 3807 IF ( ALLOCATED( e_end_index ) ) DEALLOCATE( e_end_index ) 3808 IF ( ALLOCATED( m_start_index ) ) DEALLOCATE( m_start_index ) 3809 IF ( ALLOCATED( m_end_index ) ) DEALLOCATE( m_end_index ) 3810 IF ( ALLOCATED( nr_surfaces_in_tb ) ) DEALLOCATE( nr_surfaces_in_tb ) 3811 IF ( ALLOCATED( s_index_in_tb ) ) DEALLOCATE( s_index_in_tb ) 3812 IF ( ALLOCATED( s_index_in_window ) ) DEALLOCATE( s_index_in_window ) 3813 IF ( ALLOCATED( transfer_index ) ) DEALLOCATE( transfer_index ) 3815 3814 3816 3815 IF ( .NOT. pe_active_for_read ) RETURN … … 3884 3883 INTEGER(iwp), DIMENSION(1) :: dims1 !< global dimension for MPI_TYPE_CREATE_SUBARRAY 3885 3884 INTEGER(iwp), DIMENSION(1) :: lsize1 !< local size for MPI_TYPE_CREATE_SUBARRAY 3886 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_cells_to_thread !< 3885 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_gp_with_surfaces_for_pe !< number of horizontal gridpoints containing surface elements for the respective pe 3886 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_surface_elements_for_pe !< total number of surface elements for the respective pe 3887 3887 INTEGER(iwp), DIMENSION(0:pdims(1)) :: nr_surf_cells_x !< 3888 3888 INTEGER(iwp), DIMENSION(0:pdims(1)) :: nr_surf_cells_x_s !< 3889 INTEGER(iwp), DIMENSION(0:numprocs-1) :: nr_values_to_thread !<3890 3889 INTEGER(iwp), DIMENSION(1) :: start1 !< start index for MPI_TYPE_CREATE_SUBARRAY 3891 3890 INTEGER(iwp), DIMENSION(nxl:nxr) :: sum_y !< … … 3903 3902 #endif 3904 3903 3905 LOGICAL, INTENT(OUT) :: data_to_write !< returns .TRUE., if surface data have been written3906 LOGICAL :: only_dummy_ values !< only dummy values, i.e. no data to write3904 LOGICAL, INTENT(OUT) :: data_to_write !< returns .TRUE., if surface data have been written 3905 LOGICAL :: only_dummy_elements !< only dummy elements, i.e. no data to write 3907 3906 3908 3907 … … 3935 3934 RETURN 3936 3935 ELSE 3937 IF ( .NOT. ALLOCATED( e_end_index ) ) ALLOCATE( e_end_index(nys:nyn,nxl:nxr))3938 IF ( .NOT. ALLOCATED( e_start_index ) ) ALLOCATE( e_start_index(nys:nyn,nxl:nxr) )3939 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr))3940 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) )3941 IF ( .NOT. ALLOCATED( target_thread ) ) ALLOCATE( target_thread(nys:nyn,nxl:nxr) )3942 IF ( .NOT. ALLOCATED( thread_index ) ) ALLOCATE( thread_index(0:numprocs-1))3943 IF ( .NOT. ALLOCATED( thread_values ) ) ALLOCATE( thread_values(0:numprocs-1))3944 IF ( .NOT. ALLOCATED( transfer_index ) ) ALLOCATE( transfer_index(4,0:numprocs-1) )3936 IF ( .NOT. ALLOCATED( e_end_index ) ) ALLOCATE( e_end_index(nys:nyn,nxl:nxr) ) 3937 IF ( .NOT. ALLOCATED( e_start_index ) ) ALLOCATE( e_start_index(nys:nyn,nxl:nxr) ) 3938 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr) ) 3939 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) ) 3940 IF ( .NOT. ALLOCATED( nr_surfaces_in_tb ) ) ALLOCATE( nr_surfaces_in_tb(0:numprocs-1) ) 3941 IF ( .NOT. ALLOCATED( s_index_in_tb ) ) ALLOCATE( s_index_in_tb(0:numprocs-1) ) 3942 IF ( .NOT. ALLOCATED( s_index_in_window ) ) ALLOCATE( s_index_in_window(nys:nyn,nxl:nxr) ) 3943 IF ( .NOT. ALLOCATED( transfer_index ) ) ALLOCATE( transfer_index(4,0:numprocs-1) ) 3945 3944 ENDIF 3946 3945 … … 3974 3973 ENDDO 3975 3974 ! 3976 !-- Distribute these values to all PEs along y.3975 !-- Distribute these elements to all PEs along y. 3977 3976 CALL MPI_ALLREDUCE( nr_surf_cells_y_s, nr_surf_cells_y, SIZE( nr_surf_cells_y ), & 3978 3977 MPI_INTEGER, MPI_SUM, comm1dy, ierr ) … … 4055 4054 #endif 4056 4055 4057 total_number_of_surface_ values = 04056 total_number_of_surface_elements = 0 4058 4057 DO i = 0, numprocs-1 4059 4058 IF ( i == myid ) THEN 4060 glo_start = total_number_of_surface_ values + 14061 ENDIF 4062 total_number_of_surface_ values = total_number_of_surface_values + nr_surfcells_all_r(i,1)4059 glo_start = total_number_of_surface_elements + 1 4060 ENDIF 4061 total_number_of_surface_elements = total_number_of_surface_elements + nr_surfcells_all_r(i,1) 4063 4062 ENDDO 4064 only_dummy_ values = ( MAXVAL( nr_surfcells_all_r(:,2) ) <= 0 )4063 only_dummy_elements = ( MAXVAL( nr_surfcells_all_r(:,2) ) <= 0 ) 4065 4064 4066 4065 ! 4067 4066 !-- Compute indices of equally distributed surface elements. 4068 !-- Number of surface values scheduled for ouput on this PE:4069 nr_surfcells_pe = total_number_of_surface_ values / numprocs4070 rest_cells_pe = MOD( total_number_of_surface_ values, numprocs )4067 !-- Number of surface elements scheduled for ouput on this PE: 4068 nr_surfcells_pe = total_number_of_surface_elements / numprocs 4069 rest_cells_pe = MOD( total_number_of_surface_elements, numprocs ) 4071 4070 rest_bound = rest_cells_pe * ( nr_surfcells_pe + 1 ) 4072 4071 m_start_index = start_index … … 4074 4073 4075 4074 ! 4076 !-- Compute number of elements on source PE, which have to be send to the corresponding target PE. 4077 #if defined( __parallel ) 4078 nr_cells_to_thread = 0 4079 nr_values_to_thread = 0 4075 !-- Compute number of surface elements on source PE, which have to be send to the corresponding 4076 !-- target PE. 4077 #if defined( __parallel ) 4078 nr_gp_with_surfaces_for_pe = 0 4079 nr_surface_elements_for_pe = 0 4080 4080 DO i = nxl, nxr 4081 4081 DO j = nys, nyn 4082 4082 IF ( rest_cells_pe == 0 ) THEN 4083 target_thread(j,i) = ( global_start(j,i) - 1 ) / nr_surfcells_pe4083 s_index_in_window(j,i) = ( global_start(j,i) - 1 ) / nr_surfcells_pe 4084 4084 ELSE 4085 4085 IF ( global_start(j,i) <= rest_bound ) THEN 4086 target_thread(j,i) = ( global_start(j,i) - 1 ) / ( nr_surfcells_pe + 1 )4086 s_index_in_window(j,i) = ( global_start(j,i) - 1 ) / ( nr_surfcells_pe + 1 ) 4087 4087 ELSE 4088 target_thread(j,i) = ( global_start(j,i) - rest_bound - 1 ) / nr_surfcells_pe4089 target_thread(j,i) = target_thread(j,i) + rest_cells_pe4088 s_index_in_window(j,i) = ( global_start(j,i) - rest_bound - 1 ) / nr_surfcells_pe 4089 s_index_in_window(j,i) = s_index_in_window(j,i) + rest_cells_pe 4090 4090 ENDIF 4091 4091 ! 4092 4092 !-- TODO: Test output, to be removed later. 4093 IF ( target_thread(j,i) >= numprocs ) THEN4094 WRITE( 9,'(A,8I8)' ) ' target_thread ', j, i, target_thread(j,i),&4093 IF ( s_index_in_window(j,i) >= numprocs ) THEN 4094 WRITE( 9,'(A,8I8)' ) 's_index_in_window ', j, i, s_index_in_window(j,i), & 4095 4095 global_start(j,i) , nr_surfcells_pe 4096 4096 FLUSH( 9 ) … … 4098 4098 ENDIF 4099 4099 ENDIF 4100 nr_cells_to_thread(target_thread(j,i)) = nr_cells_to_thread(target_thread(j,i)) + 1 4101 nr_values_to_thread(target_thread(j,i)) = nr_values_to_thread(target_thread(j,i)) + & 4102 e_end_index(j,i) - e_start_index(j,i) + 1 4100 nr_gp_with_surfaces_for_pe(s_index_in_window(j,i)) = & 4101 nr_gp_with_surfaces_for_pe(s_index_in_window(j,i)) + 1 4102 nr_surface_elements_for_pe(s_index_in_window(j,i)) = & 4103 nr_surface_elements_for_pe(s_index_in_window(j,i)) + & 4104 e_end_index(j,i) - e_start_index(j,i) + 1 4103 4105 ENDDO 4104 4106 ENDDO … … 4106 4108 ! 4107 4109 !-- Compute start index in the transfer buffer on the source side for the corresponding target PE. 4108 thread_index(0)= 14109 thread_values(0) = 14110 s_index_in_tb(0) = 1 4111 nr_surfaces_in_tb(0) = 1 4110 4112 DO n = 1, numprocs-1 4111 thread_index(n) = thread_index(n-1) + nr_cells_to_thread(n-1)4112 thread_values(n) = thread_values(n-1) + nr_values_to_thread(n-1)4113 s_index_in_tb(n) = s_index_in_tb(n-1) + nr_gp_with_surfaces_for_pe(n-1) 4114 nr_surfaces_in_tb(n) = nr_surfaces_in_tb(n-1) + nr_surface_elements_for_pe(n-1) 4113 4115 ENDDO 4114 4116 ! 4115 4117 !-- Buffer distribution on the source side. 4116 4118 DO n = 0, numprocs-1 4117 transfer_index_s(1,n) = thread_index(n)4118 transfer_index_s(2,n) = nr_ cells_to_thread(n)4119 transfer_index_s(3,n) = thread_values(n)4120 transfer_index_s(4,n) = nr_ values_to_thread(n)4119 transfer_index_s(1,n) = s_index_in_tb(n) 4120 transfer_index_s(2,n) = nr_gp_with_surfaces_for_pe(n) 4121 transfer_index_s(3,n) = nr_surfaces_in_tb(n) 4122 transfer_index_s(4,n) = nr_surface_elements_for_pe(n) 4121 4123 ENDDO 4122 4124 … … 4128 4130 ! 4129 4131 !-- Create surface element file type. 4130 IF ( total_number_of_surface_ values > 0 .AND. .NOT. only_dummy_values) THEN4132 IF ( total_number_of_surface_elements > 0 .AND. .NOT. only_dummy_elements) THEN 4131 4133 data_to_write = .TRUE. 4132 4134 ELSE … … 4144 4146 4145 4147 IF ( sm_io%iam_io_pe ) THEN 4146 IF ( total_number_of_surface_ values > 0 ) THEN4148 IF ( total_number_of_surface_elements > 0 ) THEN 4147 4149 CALL MPI_TYPE_CREATE_SUBARRAY( 1, dims1, lsize1, start1, MPI_ORDER_FORTRAN, MPI_REAL, & 4148 4150 ft_surf, ierr ) … … 4152 4154 ! 4153 4155 !-- Allocate rma window to supply surface data to other PEs. 4154 CALL rd_alloc_rma_mem( array_1d, SUM( nr_ values_to_thread), win_surf )4156 CALL rd_alloc_rma_mem( array_1d, SUM( nr_surface_elements_for_pe ), win_surf ) 4155 4157 ! 4156 4158 !-- Allocate shared array on IO-PE to supply data for MPI-IO (write or read). … … 4167 4169 ENDIF 4168 4170 #else 4169 IF ( total_number_of_surface_ values > 0 .AND. .NOT. only_dummy_values ) THEN4171 IF ( total_number_of_surface_elements > 0 .AND. .NOT. only_dummy_elements ) THEN 4170 4172 data_to_write = .TRUE. 4171 4173 ELSE 4172 4174 data_to_write = .FALSE. 4173 4175 ENDIF 4174 ALLOCATE( array_out(1:total_number_of_surface_ values) )4176 ALLOCATE( array_out(1:total_number_of_surface_elements) ) 4175 4177 #endif 4176 4178 … … 4269 4271 local_indices(2,:) = 0 4270 4272 4271 winsize = MAX( 2 * SUM( nr_ cells_to_thread), 2 )4273 winsize = MAX( 2 * SUM( nr_gp_with_surfaces_for_pe ), 2 ) 4272 4274 4273 4275 ALLOCATE( surf_val_index(2,winsize) ) … … 4276 4278 CALL MPI_WIN_FENCE( 0, win_ind, ierr ) 4277 4279 4278 lo_index = thread_index4280 lo_index = s_index_in_tb 4279 4281 DO i = nxl, nxr 4280 4282 DO j = nys, nyn 4281 surf_val_index(1,lo_index( target_thread(j,i))) = global_start(j,i)4282 surf_val_index(2,lo_index( target_thread(j,i))) = global_end(j,i) - global_start(j,i)&4283 + 14284 lo_index( target_thread(j,i)) = lo_index(target_thread(j,i)) + 14283 surf_val_index(1,lo_index(s_index_in_window(j,i))) = global_start(j,i) 4284 surf_val_index(2,lo_index(s_index_in_window(j,i))) = global_end(j,i) - & 4285 global_start(j,i) + 1 4286 lo_index(s_index_in_window(j,i)) = lo_index(s_index_in_window(j,i)) + 1 4285 4287 ENDDO 4286 4288 ENDDO
Note: See TracChangeset
for help on using the changeset viewer.