Changeset 4617 for palm/trunk/SOURCE
- Timestamp:
- Jul 22, 2020 9:48:50 AM (4 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/read_restart_data_mod.f90
r4590 r4617 25 25 ! ----------------- 26 26 ! $Id$ 27 ! check, if boundary conditions in the prerun are both set to cyclic 28 ! 29 ! 4590 2020-07-06 14:34:59Z suehring 27 30 ! Bugfix in allocation of hom and hom_sum in case of mpi-io restart when 28 31 ! chemistry or salsa are employed … … 1176 1179 1177 1180 CHARACTER (LEN=10) :: version_on_file 1181 CHARACTER (LEN=20) :: bc_lr_on_file 1182 CHARACTER (LEN=20) :: bc_ns_on_file 1178 1183 CHARACTER (LEN=20) :: momentum_advec_check 1179 1184 CHARACTER (LEN=20) :: scalar_advec_check … … 1308 1313 ENDIF 1309 1314 1315 CASE ( 'bc_lr' ) 1316 READ ( 13 ) bc_lr_on_file 1317 IF ( TRIM( bc_lr_on_file ) /= 'cyclic' ) THEN 1318 message_string = 'bc_lr in the prerun was set /= "cyclic"' 1319 CALL message( 'rrd_read_parts_of_global', 'PA0498', 1, 2, 0, 6, 0 ) 1320 ENDIF 1321 1322 CASE ( 'bc_ns' ) 1323 READ ( 13 ) bc_ns_on_file 1324 IF ( TRIM( bc_ns_on_file ) /= 'cyclic' ) THEN 1325 message_string = 'bc_ns in the prerun was set /= "cyclic"' 1326 CALL message( 'rrd_read_parts_of_global', 'PA0498', 1, 2, 0, 6, 0 ) 1327 ENDIF 1328 1310 1329 CASE ( 'hom' ) 1311 1330 ALLOCATE( hom_on_file(0:nz+1,2,pr_palm+max_pr_user_on_file, & … … 1445 1464 ENDIF 1446 1465 1447 CALL rrd_mpi_io( 'nx', nx_on_file ) 1448 CALL rrd_mpi_io( 'ny', ny_on_file ) 1449 CALL rrd_mpi_io_global_array( 'ref_state', ref_state ) 1466 CALL rrd_mpi_io( 'bc_lr', bc_lr_on_file ) 1467 CALL rrd_mpi_io( 'bc_ns', bc_ns_on_file ) 1468 IF ( TRIM( bc_lr_on_file ) /= 'cyclic' .OR. TRIM( bc_ns_on_file ) /= 'cyclic' ) THEN 1469 message_string = 'bc_lr and/or bc_ns in the prerun was set /= "cyclic"' 1470 CALL message( 'rrd_read_parts_of_global', 'PA0498', 1, 2, 0, 6, 0 ) 1471 ENDIF 1450 1472 1451 1473 scalar_advec_check = scalar_advec … … 1457 1479 CALL message( 'rrd_read_parts_of_global', 'PA0101', 1, 2, 0, 6, 0 ) 1458 1480 ENDIF 1481 1482 CALL rrd_mpi_io( 'nx', nx_on_file ) 1483 CALL rrd_mpi_io( 'ny', ny_on_file ) 1484 CALL rrd_mpi_io_global_array( 'ref_state', ref_state ) 1459 1485 1460 1486 ! … … 2270 2296 2271 2297 ! 2272 !-- Read global restart data using MPI-IO2298 !-- Read local restart data using MPI-IO 2273 2299 ! 2274 2300 !-- Open the MPI-IO restart file. -
palm/trunk/SOURCE/restart_data_mpi_io_mod.f90
r4598 r4617 25 25 ! ----------------- 26 26 ! $Id$ 27 ! Cyclic fill mode implemented 28 ! 29 ! 4598 2020-07-10 10:13:23Z suehring 27 30 ! Bugfix in treatment of 3D soil arrays 28 31 ! 29 32 ! 4591 2020-07-06 15:56:08Z raasch 30 33 ! File re-formatted to follow the PALM coding standard 31 !32 34 ! 33 35 ! 4539 2020-05-18 14:05:17Z raasch … … 101 103 nxl, & 102 104 nxlg, & 105 nx_on_file, & 103 106 nxr, & 104 107 nxrg, & … … 106 109 nyn, & 107 110 nyng, & 111 ny_on_file, & 108 112 nys, & 109 113 nysg, & … … 118 122 comm1dy, & 119 123 comm2d, & 124 communicator_configurations, & 120 125 myid, & 121 126 myidx, & … … 127 132 128 133 USE shared_memory_io_mod, & 129 ONLY: local_boundaries,&134 ONLY: domain_decomposition_grid_features, & 130 135 sm_class 131 136 … … 197 202 198 203 ! 199 !-- Handling of outer boundaries 200 TYPE(local_boundaries) :: lb !< 204 !-- Variable to store the grid features (index bounds) of the temporary arrays that are used 205 !-- to read and write the restart data. They differ depending on if the outer boundary of the 206 !-- total domain is contained in the restart data or not. iog stands for IO-grid. 207 TYPE(domain_decomposition_grid_features) :: iog !< 201 208 202 209 ! … … 237 244 CHARACTER(LEN=32), DIMENSION(max_nr_arrays) :: array_names 238 245 INTEGER(KIND=rd_offset_kind), DIMENSION(max_nr_arrays) :: array_offset 246 247 ! 248 !-- Variables to handle the cyclic fill initialization mode 249 INTEGER :: comm_cyclic_fill !< communicator for cyclic fill PEs 250 INTEGER :: rmawin_2di !< RMA window 2d INTEGER 251 INTEGER :: rmawin_2d !< RMA window 2d REAL 252 INTEGER :: rmawin_3d !< RMA window 3d 253 254 INTEGER(iwp), ALLOCATABLE, DIMENSION(:,:) :: remote_pe 255 INTEGER(iwp), ALLOCATABLE, DIMENSION(:,:) :: remote_pe_s 256 INTEGER(iwp), ALLOCATABLE, DIMENSION(:,:) :: rma_offset 257 INTEGER(iwp), ALLOCATABLE, DIMENSION(:,:) :: rma_offset_s 258 INTEGER(iwp), ALLOCATABLE, DIMENSION(:,:) :: rmabuf_2di 259 260 LOGICAL :: cyclic_fill_mode !< arrays are filled cyclically with data from prerun 261 LOGICAL :: pe_active_for_read = .TRUE. !< this PE is active for reading data from prerun or 262 !< restart run. For restarts all PEs are active. 263 264 REAL(wp), ALLOCATABLE, DIMENSION(:,:) :: rmabuf_2d 265 REAL(wp), ALLOCATABLE, DIMENSION(:,:,:) :: rmabuf_3d 266 267 TYPE(domain_decomposition_grid_features) :: mainrun_grid !< grid variables for the main run 268 TYPE(domain_decomposition_grid_features) :: prerun_grid !< grid variables for the prerun 269 270 239 271 SAVE 240 272 … … 352 384 #endif 353 385 386 ! write(9,*) 'Here is rd_mpi_io_open',nx,nx_on_file,ny,ny_on_file,TRIM(action) !kk may become Debug Output 354 387 355 388 offset = 0 … … 387 420 ENDIF 388 421 389 CALL sm_io%sm_init_comm( io_on_limited_cores_per_node ) 422 ! 423 !-- Determine, if prerun data shall be read and mapped cyclically to the mainrun arrays. 424 !-- In cyclic fill mode only a subset of the PEs will read. 425 cyclic_fill_mode = .FALSE. 426 pe_active_for_read = .TRUE. 427 428 IF ( rd_flag .AND. .NOT. PRESENT( open_for_global_io_only ) .AND. & 429 nx_on_file < nx .AND. ny_on_file < ny ) THEN 430 cyclic_fill_mode = .TRUE. 431 CALL setup_cyclic_fill 432 ! 433 !-- Shared memory IO on limited cores is not allowed for cyclic fill mode 434 CALL sm_io%sm_init_comm( .FALSE. ) ! 435 ELSE 436 CALL sm_io%sm_init_comm( io_on_limited_cores_per_node ) 437 ENDIF 438 439 ! 440 !-- TODO: add a more detailed meaningful comment about what is happening here 441 !-- activate model grid 442 IF( cyclic_fill_mode .AND. .NOT. pe_active_for_read ) THEN 443 CALL mainrun_grid%activate_grid_from_this_class() 444 RETURN 445 ENDIF 446 390 447 391 448 ! … … 393 450 IF( sm_io%is_sm_active() ) THEN 394 451 comm_io = sm_io%comm_io 452 ELSEIF ( cyclic_fill_mode ) THEN 453 comm_io = comm_cyclic_fill 395 454 ELSE 396 455 comm_io = comm2d … … 671 730 #endif 672 731 673 ENDIF 732 733 ENDIF 734 735 ! 736 !-- TODO: describe in more detail what is happening here 737 !-- activate model grid 738 IF ( cyclic_fill_mode ) CALL mainrun_grid%activate_grid_from_this_class() 739 740 CONTAINS 741 742 SUBROUTINE setup_cyclic_fill 743 744 IMPLICIT NONE 745 746 INTEGER :: color !< used to set the IO PEs for MPI_COMM_SPLIT 747 INTEGER :: ierr !< 748 INTEGER(iwp) :: i !< 749 INTEGER(iwp) :: j !< 750 INTEGER(KIND=MPI_ADDRESS_KIND) :: winsize !< size of RMA window 751 752 ! 753 !-- TODO: describe in more detail what is done here and why it is done 754 !-- save grid of main run 755 CALL mainrun_grid%save_grid_into_this_class() 756 757 ALLOCATE( remote_pe(0:nx_on_file,0:ny_on_file) ) 758 ALLOCATE( remote_pe_s(0:nx_on_file,0:ny_on_file) ) 759 ALLOCATE( rma_offset(0:nx_on_file,0:ny_on_file) ) 760 ALLOCATE( rma_offset_s(0:nx_on_file,0:ny_on_file) ) 761 762 remote_pe_s = 0 763 rma_offset_s = 0 764 ! 765 !-- Determine, if gridpoints of the prerun are located on this thread. 766 !-- Set the (cyclic) prerun grid. 767 nxr = MIN( nxr, nx_on_file ) 768 IF ( nxl > nx_on_file ) THEN 769 nxl = -99 770 nxr = -99 771 nnx = 0 772 ELSE 773 nnx =nxr-nxl+1 774 ENDIF 775 776 nyn = MIN( nyn, ny_on_file ) 777 IF ( nys > ny_on_file ) THEN 778 nys = -99 779 nyn = -99 780 nny = 0 781 ELSE 782 nny = nyn-nys+1 783 ENDIF 784 785 nx = nx_on_file 786 ny = ny_on_file 787 ! 788 !-- Determine, if this thread is doing IO 789 IF ( nnx > 0 .AND. nny > 0 ) THEN 790 color = 1 791 pe_active_for_read = .TRUE. 792 remote_pe_s(nxl:nxr,nys:nyn) = myid ! myid from comm2d 793 DO j = nys, nyn 794 DO i = nxl, nxr 795 rma_offset_s(i,j) = ( j-nys ) + ( i-nxl ) * nny 796 ENDDO 797 ENDDO 798 ELSE 799 color = MPI_UNDEFINED 800 pe_active_for_read = .FALSE. 801 ENDIF 802 803 #if defined( __parallel ) 804 CALL MPI_ALLREDUCE( remote_pe_s, remote_pe, SIZE(remote_pe_s), MPI_INTEGER, MPI_SUM, & 805 comm2d, ierr ) 806 CALL MPI_ALLREDUCE( rma_offset_s, rma_offset, SIZE(rma_offset_s), MPI_INTEGER, MPI_SUM, & 807 comm2d, ierr ) 808 CALL MPI_COMM_SPLIT( comm2d, color, 0, comm_cyclic_fill, ierr ) 809 810 IF ( pe_active_for_read ) THEN 811 CALL MPI_COMM_SIZE( comm_cyclic_fill, numprocs, ierr ) 812 CALL MPI_COMM_RANK( comm_cyclic_fill, myid, ierr ) 813 ENDIF 814 #else 815 remote_pe = remote_pe_s 816 rma_offset = rma_offset_s 817 myid = 0 818 numprocs = 1 819 #endif 820 ! 821 !-- Allocate 2d buffers as RMA window, accessible on all threads 822 IF ( pe_active_for_read ) THEN 823 ALLOCATE( rmabuf_2di(nys:nyn,nxl:nxr) ) 824 ELSE 825 ALLOCATE( rmabuf_2di(1,1) ) 826 ENDIF 827 winsize = SIZE( rmabuf_2di ) * iwp 828 829 #if defined( __parallel ) 830 CALL MPI_WIN_CREATE( rmabuf_2di, winsize, iwp, MPI_INFO_NULL, comm2d, rmawin_2di, ierr ) 831 CALL MPI_WIN_FENCE( 0, rmawin_2di, ierr ) 832 #endif 833 834 IF ( pe_active_for_read ) THEN 835 ALLOCATE( rmabuf_2d(nys:nyn,nxl:nxr) ) 836 ELSE 837 ALLOCATE( rmabuf_2d(1,1) ) 838 ENDIF 839 winsize = SIZE( rmabuf_2d ) * wp 840 841 #if defined( __parallel ) 842 CALL MPI_WIN_CREATE( rmabuf_2d, winsize, wp, MPI_INFO_NULL, comm2d, rmawin_2d, ierr ) 843 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 844 #endif 845 846 ! 847 !-- Allocate 3d buffer as RMA window, accessable on all threads 848 IF ( pe_active_for_read ) THEN 849 ALLOCATE( rmabuf_3d(nzb:nzt+1,nys:nyn,nxl:nxr) ) 850 ELSE 851 ALLOCATE( rmabuf_3d(1,1,1) ) 852 ENDIF 853 winsize = SIZE( rmabuf_3d ) * wp 854 855 #if defined( __parallel ) 856 CALL MPI_WIN_CREATE( rmabuf_3d, winsize, wp, MPI_INFO_NULL, comm2d, rmawin_3d, ierr ) 857 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 858 #endif 859 860 ! 861 !-- TODO: comment in more detail, what is done here, and why 862 !-- save small grid 863 CALL prerun_grid%save_grid_into_this_class() 864 prerun_grid%comm2d = comm_cyclic_fill 865 866 DEALLOCATE( remote_pe_s, rma_offset_s ) 867 868 END SUBROUTINE setup_cyclic_fill 674 869 675 870 END SUBROUTINE rd_mpi_io_open … … 813 1008 814 1009 IF ( found ) THEN 815 #if defined( __parallel ) 816 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 817 IF ( sm_io%iam_io_pe ) THEN 1010 1011 IF ( cyclic_fill_mode ) THEN 1012 1013 CALL rrd_mpi_io_real_2d_cyclic_fill 1014 1015 ELSE 1016 1017 #if defined( __parallel ) 1018 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited # of cores is inactive 1019 IF ( sm_io%iam_io_pe ) THEN 1020 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_2d, 'native', MPI_INFO_NULL, & 1021 ierr ) 1022 CALL MPI_FILE_READ_ALL( fh, array_2d, SIZE( array_2d ), MPI_REAL, status, ierr ) 1023 ENDIF 1024 CALL sm_io%sm_node_barrier() 1025 #else 1026 CALL posix_lseek( fh, array_position ) 1027 CALL posix_read( fh, array_2d, SIZE( array_2d ) ) 1028 #endif 1029 1030 IF ( include_total_domain_boundaries ) THEN 1031 DO i = iog%nxl, iog%nxr 1032 data(iog%nys-nbgp:iog%nyn-nbgp,i-nbgp) = array_2d(i,iog%nys:iog%nyn) 1033 ENDDO 1034 IF ( debug_level >= 2) THEN 1035 WRITE(9,*) 'r2f_ob ', TRIM(name),' ', SUM( data(nys:nyn,nxl:nxr) ) 1036 ENDIF 1037 ELSE 1038 DO i = nxl, nxr 1039 data(nys:nyn,i) = array_2d(i,nys:nyn) 1040 ENDDO 1041 IF ( debug_level >= 2) THEN 1042 WRITE(9,*) 'r2f ', TRIM( name ),' ', SUM( data(nys:nyn,nxl:nxr) ) 1043 ENDIF 1044 ENDIF 1045 1046 ENDIF 1047 1048 CALL exchange_horiz_2d( data ) 1049 1050 ELSE 1051 message_string = '2d-REAL array "' // TRIM( name ) // '" not found in restart file' 1052 CALL message( 'rrd_mpi_io_int', 'PA0722', 3, 2, 0, 6, 0 ) 1053 ENDIF 1054 1055 1056 CONTAINS 1057 1058 SUBROUTINE rrd_mpi_io_real_2d_cyclic_fill 1059 1060 IMPLICIT NONE 1061 1062 INTEGER(iwp) :: i !< 1063 INTEGER(iwp) :: ie !< 1064 INTEGER(iwp) :: ierr !< 1065 INTEGER(iwp) :: is !< 1066 INTEGER(iwp) :: i_remote !< 1067 INTEGER(iwp) :: j !< 1068 INTEGER(iwp) :: je !< 1069 INTEGER(iwp) :: js !< 1070 INTEGER(iwp) :: j_remote !< 1071 INTEGER(iwp) :: nval !< 1072 INTEGER(iwp) :: rem_pe !< 1073 1074 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_offs !< 1075 1076 1077 !kk write(9,*) 'Here is rma_cylic_fill_real_2d ',nxl,nxr,nys,nyn; FLUSH(9) 1078 1079 ! 1080 !-- Reading 2d real array on prerun grid 1081 CALL prerun_grid%activate_grid_from_this_class() 1082 1083 IF ( pe_active_for_read ) THEN 1084 818 1085 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_2d, 'native', MPI_INFO_NULL, & 819 1086 ierr ) 820 1087 CALL MPI_FILE_READ_ALL( fh, array_2d, SIZE( array_2d ), MPI_REAL, status, ierr ) 821 ENDIF 822 CALL sm_io%sm_node_barrier() 1088 1089 DO i = nxl, nxr 1090 rmabuf_2d(nys:nyn,i) = array_2d(i,nys:nyn) 1091 ENDDO 1092 data(nys:nyn,nxl:nxr) = rmabuf_2d ! copy prerund data directly into output array data 1093 ENDIF 1094 1095 CALL mainrun_grid%activate_grid_from_this_class() 1096 1097 #if defined( __parallel ) 1098 ! 1099 !-- Close RMA window to allow remote access 1100 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 1101 #endif 1102 1103 ! 1104 !-- TODO: describe in more detail what is happening in this IF/ELSE clause 1105 IF ( .NOT. pe_active_for_read ) THEN 1106 1107 is = nxl 1108 ie = nxr 1109 js = nys 1110 je = nyn 1111 1112 ELSE 1113 ! 1114 !-- Extra get for cyclic data north of prerun data 1115 is = nxl 1116 ie = nxr 1117 js = prerun_grid%nys+1 1118 je = nyn 1119 DO i = is, ie 1120 DO j = js, je 1121 i_remote = MOD(i,nx_on_file+1) 1122 j_remote = MOD(j,ny_on_file+1) 1123 rem_pe = remote_pe(i_remote,j_remote) 1124 rem_offs = rma_offset(i_remote,j_remote) 1125 nval = 1 1126 1127 #if defined( __parallel ) 1128 IF ( rem_pe /= myid ) THEN 1129 CALL MPI_GET( data(j,i), nval, MPI_REAL, rem_pe, rem_offs, nval, MPI_REAL, & 1130 rmawin_2d, ierr ) 1131 ELSE 1132 data(j,i) = rmabuf_2d(j_remote,i_remote) 1133 ENDIF 823 1134 #else 824 CALL posix_lseek( fh, array_position ) 825 CALL posix_read( fh, array_2d, SIZE( array_2d ) ) 826 #endif 827 828 IF ( include_total_domain_boundaries ) THEN 829 DO i = lb%nxl, lb%nxr 830 data(lb%nys-nbgp:lb%nyn-nbgp,i-nbgp) = array_2d(i,lb%nys:lb%nyn) 1135 data(j,i) = array_2d(i_remote,j_remote) 1136 #endif 1137 ENDDO 831 1138 ENDDO 832 IF ( debug_level >= 2) WRITE(9,*) 'r2f_ob ', TRIM(name),' ', SUM( data(nys:nyn,nxl:nxr) ) 833 ELSE 834 DO i = nxl, nxr 835 data(nys:nyn,i) = array_2d(i,nys:nyn) 1139 ! 1140 !-- Prepare setup for stripe right of prerun data 1141 is = prerun_grid%nxr+1 1142 ie = nxr 1143 js = nys 1144 je = nyn 1145 1146 ENDIF 1147 1148 DO i = is, ie 1149 DO j = js, je 1150 i_remote = MOD(i,nx_on_file+1) 1151 j_remote = MOD(j,ny_on_file+1) 1152 rem_pe = remote_pe(i_remote,j_remote) 1153 rem_offs = rma_offset(i_remote,j_remote) 1154 nval = 1 1155 1156 #if defined( __parallel ) 1157 IF ( rem_pe /= myid ) THEN 1158 CALL MPI_GET( data(j,i), nval, MPI_REAL, rem_pe, rem_offs, nval, MPI_REAL, & 1159 rmawin_2d, ierr ) 1160 ELSE 1161 data(j,i) = rmabuf_2d(j_remote,i_remote) 1162 ENDIF 1163 #else 1164 data(j,i) = array_2d(i_remote,j_remote) 1165 #endif 836 1166 ENDDO 837 IF ( debug_level >= 2) WRITE(9,*) 'r2f ', TRIM( name ),' ', SUM( data(nys:nyn,nxl:nxr) )838 ENDIF 839 840 CALL exchange_horiz_2d( data ) 841 842 ELSE843 message_string = '2d-REAL array "' // TRIM( name ) // '" not found in restart file' 844 CALL message( 'rrd_mpi_io_int', 'PA0722', 3, 2, 0, 6, 0 ) 845 END IF1167 ENDDO 1168 1169 #if defined( __parallel ) 1170 ! 1171 !-- Reopen RMA window to allow filling 1172 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 1173 #endif 1174 1175 END SUBROUTINE rrd_mpi_io_real_2d_cyclic_fill 846 1176 847 1177 END SUBROUTINE rrd_mpi_io_real_2d … … 899 1229 !-- This kind of array is dimensioned in the caller subroutine 900 1230 !-- INTEGER, DIMENSION(nys:nyn,nxl:nxr) :: data 901 902 #if defined( __parallel ) 903 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 904 IF ( sm_io%iam_io_pe ) THEN 905 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_INTEGER, ft_2di_nb, 'native', & 906 MPI_INFO_NULL, ierr ) 907 CALL MPI_FILE_READ_ALL( fh, array_2di, SIZE( array_2di ), MPI_INTEGER, status, ierr ) 1231 IF ( cyclic_fill_mode ) THEN 1232 1233 CALL rrd_mpi_io_int_2d_cyclic_fill 1234 1235 ELSE 1236 1237 #if defined( __parallel ) 1238 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited # of cores is inactive 1239 IF ( sm_io%iam_io_pe ) THEN 1240 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_INTEGER, ft_2di_nb, 'native', & 1241 MPI_INFO_NULL, ierr ) 1242 CALL MPI_FILE_READ_ALL( fh, array_2di, SIZE( array_2di ), MPI_INTEGER, status, & 1243 ierr ) 1244 ENDIF 1245 CALL sm_io%sm_node_barrier() 1246 #else 1247 CALL posix_lseek( fh, array_position ) 1248 CALL posix_read( fh, array_2di, SIZE( array_2di ) ) 1249 #endif 1250 DO j = nys, nyn 1251 DO i = nxl, nxr 1252 data(j-nys+1,i-nxl+1) = array_2di(i,j) 1253 ENDDO 1254 ENDDO 1255 908 1256 ENDIF 909 CALL sm_io%sm_node_barrier()910 #else911 CALL posix_lseek( fh, array_position )912 CALL posix_read( fh, array_2di, SIZE( array_2di ) )913 #endif914 915 DO j = nys, nyn916 DO i = nxl, nxr917 data(j-nys+1,i-nxl+1) = array_2di(i,j)918 ENDDO919 ENDDO920 1257 921 1258 ELSE … … 934 1271 ENDIF 935 1272 1273 1274 CONTAINS 1275 1276 SUBROUTINE rrd_mpi_io_int_2d_cyclic_fill 1277 1278 IMPLICIT NONE 1279 1280 INTEGER(iwp) :: i !< 1281 INTEGER(iwp) :: ie !< 1282 INTEGER(iwp) :: ierr !< 1283 INTEGER(iwp) :: is !< 1284 INTEGER(iwp) :: i_remote !< 1285 INTEGER(iwp) :: j !< 1286 INTEGER(iwp) :: je !< 1287 INTEGER(iwp) :: js !< 1288 INTEGER(iwp) :: j_remote !< 1289 INTEGER(iwp) :: nval !< 1290 INTEGER(iwp) :: rem_pe !< 1291 1292 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_offs !< 1293 1294 1295 CALL prerun_grid%activate_grid_from_this_class() 1296 1297 IF ( pe_active_for_read ) THEN 1298 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_INTEGER, ft_2di_nb, 'native', & 1299 MPI_INFO_NULL, ierr ) 1300 CALL MPI_FILE_READ_ALL( fh, array_2di, SIZE( array_2di ), MPI_INTEGER, status, ierr ) 1301 1302 DO i = nxl, nxr 1303 rmabuf_2di(nys:nyn,i) = array_2di(i,nys:nyn) 1304 ENDDO 1305 data(1:nny,1:nnx) = rmabuf_2di 1306 ENDIF 1307 1308 CALL mainrun_grid%activate_grid_from_this_class() 1309 1310 #if defined( __parallel ) 1311 ! 1312 !-- Close RMA window to allow remote access 1313 CALL MPI_WIN_FENCE( 0, rmawin_2di, ierr ) 1314 #endif 1315 1316 IF ( .NOT. pe_active_for_read ) THEN 1317 1318 is = nxl 1319 ie = nxr 1320 js = nys 1321 je = nyn 1322 1323 ELSE 1324 1325 is = nxl 1326 ie = nxr 1327 js = prerun_grid%nys+1 1328 je = nyn 1329 DO i = is, ie 1330 DO j = js, je 1331 i_remote = MOD(i,nx_on_file+1) 1332 j_remote = MOD(j,ny_on_file+1) 1333 rem_pe = remote_pe(i_remote,j_remote) 1334 rem_offs = rma_offset(i_remote,j_remote) 1335 nval = 1 1336 1337 #if defined( __parallel ) 1338 IF ( rem_pe /= myid ) THEN 1339 CALL MPI_GET( data(j-nys+1,i-nxl+1), nval, MPI_INTEGER, rem_pe, rem_offs, nval, & 1340 MPI_INTEGER, rmawin_2di, ierr ) 1341 ELSE 1342 data(j-nys+1,i-nxl+1) = rmabuf_2di(j_remote,i_remote) 1343 ENDIF 1344 #else 1345 data(j-nys+1,i-nxl+1) = array_2di(i_remote,j_remote) 1346 #endif 1347 ENDDO 1348 ENDDO 1349 is = prerun_grid%nxr+1 1350 ie = nxr 1351 js = nys 1352 je = nyn 1353 1354 ENDIF 1355 1356 DO i = is, ie 1357 DO j = js, je 1358 i_remote = MOD(i,nx_on_file+1) 1359 j_remote = MOD(j,ny_on_file+1) 1360 rem_pe = remote_pe(i_remote,j_remote) 1361 rem_offs = rma_offset(i_remote,j_remote) 1362 nval = 1 1363 #if defined( __parallel ) 1364 IF ( rem_pe /= myid ) THEN 1365 CALL MPI_GET( data(j-nys+1,i-nxl+1), nval, MPI_INTEGER, rem_pe, rem_offs, nval, & 1366 MPI_INTEGER, rmawin_2di, ierr) 1367 ELSE 1368 data(j-nys+1,i-nxl+1) = rmabuf_2di(j_remote,i_remote) 1369 ENDIF 1370 #else 1371 data(j-nys+1,i-nxl+1) = array_2di(i_remote,j_remote) 1372 #endif 1373 ENDDO 1374 ENDDO 1375 1376 #if defined( __parallel ) 1377 ! 1378 !-- Reopen RMA window to allow filling 1379 CALL MPI_WIN_FENCE( 0, rmawin_2di, ierr ) 1380 #endif 1381 1382 END SUBROUTINE rrd_mpi_io_int_2d_cyclic_fill 1383 936 1384 END SUBROUTINE rrd_mpi_io_int_2d 937 1385 … … 950 1398 951 1399 INTEGER(iwp) :: i !< 1400 INTEGER(iwp) :: j !< 952 1401 953 1402 #if defined( __parallel ) … … 961 1410 962 1411 found = .FALSE. 1412 data = -1.0 963 1413 964 1414 DO i = 1, tgh%nr_arrays … … 971 1421 972 1422 IF ( found ) THEN 973 #if defined( __parallel ) 974 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited number of cores is inactive 975 IF( sm_io%iam_io_pe ) THEN 1423 1424 IF ( cyclic_fill_mode ) THEN 1425 1426 CALL rrd_mpi_io_real_3d_cyclic_fill 1427 ! 1428 !-- Cyclic fill mode requires to use the "cyclic" communicator, in order to initialize 1429 !-- grid points at the outer boundaries (ghost layers) of the total domain. These points 1430 !-- are not contained in the prerun data, because the prerun used cyclic boundary conditions. 1431 CALL exchange_horiz( data, nbgp, alternative_communicator = 1 ) 1432 1433 ELSE 1434 #if defined( __parallel ) 1435 CALL sm_io%sm_node_barrier() ! Has no effect if I/O on limited # of cores is inactive 1436 IF( sm_io%iam_io_pe ) THEN 1437 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_3d, 'native', MPI_INFO_NULL, & 1438 ierr ) 1439 CALL MPI_FILE_READ_ALL( fh, array_3d, SIZE( array_3d ), MPI_REAL, status, ierr ) 1440 ENDIF 1441 CALL sm_io%sm_node_barrier() 1442 #else 1443 CALL posix_lseek( fh, array_position ) 1444 CALL posix_read(fh, array_3d, SIZE( array_3d ) ) 1445 #endif 1446 IF ( include_total_domain_boundaries ) THEN 1447 DO i = iog%nxl, iog%nxr 1448 data(:,iog%nys-nbgp:iog%nyn-nbgp,i-nbgp) = array_3d(:,i,iog%nys:iog%nyn) 1449 ENDDO 1450 ELSE 1451 DO i = nxl, nxr 1452 data(:,nys:nyn,i) = array_3d(:,i,nys:nyn) 1453 ENDDO 1454 ENDIF 1455 1456 CALL exchange_horiz( data, nbgp ) 1457 1458 ENDIF 1459 1460 ELSE 1461 1462 message_string = '3d-REAL array "' // TRIM( name ) // '" not found in restart file' 1463 CALL message( 'rrd_mpi_io_real_3d', 'PA0722', 3, 2, 0, 6, 0 ) 1464 1465 ENDIF 1466 1467 1468 CONTAINS 1469 1470 SUBROUTINE rrd_mpi_io_real_3d_cyclic_fill 1471 1472 IMPLICIT NONE 1473 1474 INTEGER(iwp) :: i !< 1475 INTEGER(iwp) :: ie !< 1476 INTEGER(iwp) :: ierr !< 1477 INTEGER(iwp) :: is !< 1478 INTEGER(iwp) :: i_remote !< 1479 INTEGER(iwp) :: j !< 1480 INTEGER(iwp) :: je !< 1481 INTEGER(iwp) :: js !< 1482 INTEGER(iwp) :: j_remote !< 1483 INTEGER(iwp) :: nval !< 1484 INTEGER(iwp) :: rem_pe !< 1485 1486 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_offs !< 1487 1488 1489 CALL prerun_grid%activate_grid_from_this_class() 1490 1491 IF ( pe_active_for_read ) THEN 976 1492 CALL MPI_FILE_SET_VIEW( fh, array_position, MPI_REAL, ft_3d, 'native', MPI_INFO_NULL, & 977 1493 ierr ) 978 1494 CALL MPI_FILE_READ_ALL( fh, array_3d, SIZE( array_3d ), MPI_REAL, status, ierr ) 979 ENDIF 980 CALL sm_io%sm_node_barrier() 1495 1496 DO i = nxl, nxr 1497 rmabuf_3d(:,nys:nyn,i) = array_3d(:,i,nys:nyn) 1498 ENDDO 1499 data(:,nys:nyn,nxl:nxr) = rmabuf_3d 1500 ENDIF 1501 CALL mainrun_grid%activate_grid_from_this_class () 1502 1503 #if defined( __parallel ) 1504 ! 1505 !-- Close RMA window to allow remote access 1506 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 1507 #endif 1508 1509 IF ( .NOT. pe_active_for_read ) THEN 1510 1511 is = nxl 1512 ie = nxr 1513 js = nys 1514 je = nyn 1515 1516 ELSE 1517 1518 is = nxl 1519 ie = nxr 1520 js = prerun_grid%nys+1 1521 je = nyn 1522 1523 DO i = is, ie 1524 DO j = js, je 1525 i_remote = MOD(i,nx_on_file+1) 1526 j_remote = MOD(j,ny_on_file+1) 1527 rem_pe = remote_pe(i_remote,j_remote) 1528 rem_offs = rma_offset(i_remote,j_remote)*(nzt-nzb+2) 1529 nval = nzt-nzb+2 1530 1531 #if defined( __parallel ) 1532 IF(rem_pe /= myid) THEN 1533 CALL MPI_GET( data(nzb,j,i), nval, MPI_REAL, rem_pe, rem_offs, nval, MPI_REAL, & 1534 rmawin_3d, ierr) 1535 ELSE 1536 data(:,j,i) = rmabuf_3d(:,j_remote,i_remote) 1537 ENDIF 981 1538 #else 982 CALL posix_lseek( fh, array_position ) 983 CALL posix_read(fh, array_3d, SIZE( array_3d ) ) 984 #endif 985 IF ( include_total_domain_boundaries ) THEN 986 DO i = lb%nxl, lb%nxr 987 data(:,lb%nys-nbgp:lb%nyn-nbgp,i-nbgp) = array_3d(:,i,lb%nys:lb%nyn) 1539 data(:,j,i) = array_3d(:,i_remote,j_remote) 1540 #endif 1541 ENDDO 988 1542 ENDDO 989 ELSE 990 DO i = nxl, nxr 991 data(:,nys:nyn,i) = array_3d(:,i,nys:nyn) 1543 is = prerun_grid%nxr+1 1544 ie = nxr 1545 js = nys 1546 je = nyn 1547 1548 ENDIF 1549 1550 DO i = is, ie 1551 DO j = js, je 1552 i_remote = MOD(i,nx_on_file+1) 1553 j_remote = MOD(j,ny_on_file+1) 1554 rem_pe = remote_pe(i_remote,j_remote) 1555 rem_offs = rma_offset(i_remote,j_remote) * ( nzt-nzb+2 ) 1556 nval = nzt-nzb+2 1557 1558 #if defined( __parallel ) 1559 IF ( rem_pe /= myid ) THEN 1560 CALL MPI_GET( data(nzb,j,i), nval, MPI_REAL, rem_pe, rem_offs, nval, MPI_REAL, & 1561 rmawin_3d, ierr) 1562 ELSE 1563 data(:,j,i) = rmabuf_3d(:,j_remote,i_remote) 1564 ENDIF 1565 #else 1566 data(:,j,i) = array_3d(:,i_remote,j_remote) 1567 #endif 992 1568 ENDDO 993 ENDIF 994 995 CALL exchange_horiz( data, nbgp ) 996 997 ELSE 998 999 message_string = '3d-REAL array "' // TRIM( name ) // '" not found in restart file' 1000 CALL message( 'rrd_mpi_io_real_3d', 'PA0722', 3, 2, 0, 6, 0 ) 1001 1002 ENDIF 1569 ENDDO 1570 1571 #if defined( __parallel ) 1572 ! 1573 !-- Reopen RMA window to allow filling 1574 CALL MPI_WIN_FENCE( 0, rmawin_3d, ierr ) 1575 #endif 1576 1577 END SUBROUTINE rrd_mpi_io_real_3d_cyclic_fill 1003 1578 1004 1579 END SUBROUTINE rrd_mpi_io_real_3d … … 1028 1603 1029 1604 LOGICAL :: found !< 1605 INTEGER(iwp) :: ierr !< 1030 1606 1031 1607 REAL(wp), INTENT(INOUT), DIMENSION(nzb_soil:nzt_soil,nysg:nyng,nxlg:nxrg) :: data !< 1032 1608 1609 1610 ! 1611 !-- Prerun data is not allowed to contain soil information so far 1612 IF ( cyclic_fill_mode ) THEN 1613 message_string = 'prerun data is not allowed to contain soil information' 1614 CALL message( 'rrd_mpi_io_real_3d_soil', 'PA0729', 3, 2, -1, 6, 0 ) 1615 ENDIF 1033 1616 1034 1617 found = .FALSE. … … 1058 1641 #endif 1059 1642 IF ( include_total_domain_boundaries ) THEN 1060 DO i = lb%nxl, lb%nxr1061 data(:, lb%nys-nbgp:lb%nyn-nbgp,i-nbgp) = array_3d_soil(:,i,lb%nys:lb%nyn)1643 DO i = iog%nxl, iog%nxr 1644 data(:,iog%nys-nbgp:iog%nyn-nbgp,i-nbgp) = array_3d_soil(:,i,iog%nys:iog%nyn) 1062 1645 ENDDO 1063 1646 ELSE … … 1222 1805 ! 1223 1806 !-- Prepare output with outer boundaries 1224 DO i = lb%nxl, lb%nxr1225 array_2d(i, lb%nys:lb%nyn) = data(lb%nys-nbgp:lb%nyn-nbgp,i-nbgp)1807 DO i = iog%nxl, iog%nxr 1808 array_2d(i,iog%nys:iog%nyn) = data(iog%nys-nbgp:iog%nyn-nbgp,i-nbgp) 1226 1809 ENDDO 1227 1810 … … 1230 1813 !-- Prepare output without outer boundaries 1231 1814 DO i = nxl,nxr 1232 array_2d(i, lb%nys:lb%nyn) = data(nys:nyn,i)1815 array_2d(i,iog%nys:iog%nyn) = data(nys:nyn,i) 1233 1816 ENDDO 1234 1817 … … 1249 1832 !-- Type conversion required, otherwise right hand side brackets are calculated assuming 4 byte INT. 1250 1833 !-- Maybe a compiler problem. 1251 array_position = array_position + ( INT( lb%ny, KIND=rd_offset_kind ) + 1 ) *&1252 ( INT( lb%nx, KIND=rd_offset_kind ) + 1 ) * wp1834 array_position = array_position + ( INT( iog%ny, KIND=rd_offset_kind ) + 1 ) * & 1835 ( INT( iog%nx, KIND=rd_offset_kind ) + 1 ) * wp 1253 1836 1254 1837 END SUBROUTINE wrd_mpi_io_real_2d … … 1365 1948 !-- index order of the array in the same way, i.e. the first dimension should be along x and the 1366 1949 !-- second along y. For this reason, the original PALM data need to be swaped. 1367 DO i = lb%nxl, lb%nxr1368 array_3d(:,i, lb%nys:lb%nyn) = data(:,lb%nys-nbgp:lb%nyn-nbgp,i-nbgp)1950 DO i = iog%nxl, iog%nxr 1951 array_3d(:,i,iog%nys:iog%nyn) = data(:,iog%nys-nbgp:iog%nyn-nbgp,i-nbgp) 1369 1952 ENDDO 1370 1953 … … 1373 1956 !-- Prepare output of 3d-REAL-array without ghost layers 1374 1957 DO i = nxl, nxr 1375 array_3d(:,i, lb%nys:lb%nyn) = data(:,nys:nyn,i)1958 array_3d(:,i,iog%nys:iog%nyn) = data(:,nys:nyn,i) 1376 1959 ENDDO 1377 1960 … … 1391 1974 !-- Type conversion required, otherwise right hand side brackets are calculated assuming 4 byte INT. 1392 1975 !-- Maybe a compiler problem. 1393 array_position = array_position + INT( (nz+2), KIND = rd_offset_kind ) *&1394 INT( ( lb%ny+1), KIND = rd_offset_kind ) *&1395 INT( ( lb%nx+1), KIND = rd_offset_kind ) * wp1976 array_position = array_position + INT( (nz+2), KIND = rd_offset_kind ) * & 1977 INT( (iog%ny+1), KIND = rd_offset_kind ) * & 1978 INT( (iog%nx+1), KIND = rd_offset_kind ) * wp 1396 1979 1397 1980 END SUBROUTINE wrd_mpi_io_real_3d … … 1435 2018 #endif 1436 2019 1437 IF ( include_total_domain_boundaries 2020 IF ( include_total_domain_boundaries) THEN 1438 2021 ! 1439 2022 !-- Prepare output of 3d-REAL-array with ghost layers. In the virtual PE grid, the first … … 1441 2024 !-- index order of the array in the same way, i.e. the first dimension should be along x and the 1442 2025 !-- second along y. For this reason, the original PALM data need to be swaped. 1443 DO i = lb%nxl, lb%nxr1444 array_3d_soil(:,i, lb%nys:lb%nyn) = data(:,lb%nys-nbgp:lb%nyn-nbgp,i-nbgp)2026 DO i = iog%nxl, iog%nxr 2027 array_3d_soil(:,i,iog%nys:iog%nyn) = data(:,iog%nys-nbgp:iog%nyn-nbgp,i-nbgp) 1445 2028 ENDDO 1446 2029 … … 1449 2032 !-- Prepare output of 3d-REAL-array without ghost layers 1450 2033 DO i = nxl, nxr 1451 array_3d_soil(:,i, lb%nys:lb%nyn) = data(:,nys:nyn,i)2034 array_3d_soil(:,i,iog%nys:iog%nyn) = data(:,nys:nyn,i) 1452 2035 ENDDO 1453 2036 … … 1469 2052 !-- Maybe a compiler problem. 1470 2053 array_position = array_position + INT( (nzt_soil-nzb_soil+1), KIND = rd_offset_kind ) * & 1471 INT( ( lb%ny+1),KIND = rd_offset_kind ) * &1472 INT( ( lb%nx+1),KIND = rd_offset_kind ) * wp2054 INT( (iog%ny+1), KIND = rd_offset_kind ) * & 2055 INT( (iog%nx+1), KIND = rd_offset_kind ) * wp 1473 2056 1474 2057 END SUBROUTINE wrd_mpi_io_real_3d_soil … … 1566 2149 ENDDO 1567 2150 2151 1568 2152 IF ( found ) THEN 2153 1569 2154 ! 1570 2155 !-- Set default view 1571 2156 #if defined( __parallel ) 1572 IF ( sm_io%iam_io_pe ) THEN 1573 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 1574 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 1575 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data ), MPI_REAL, status, ierr ) 1576 ENDIF 1577 IF ( sm_io%is_sm_active() ) THEN 1578 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, sm_io%comm_shared, ierr ) 2157 IF ( cyclic_fill_mode ) THEN !kk This may be the general solution for all cases 2158 IF ( pe_active_for_read ) THEN 2159 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2160 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2161 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data ), MPI_REAL, status, ierr ) 2162 ENDIF 2163 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, comm2d, ierr ) 2164 ELSE 2165 IF ( sm_io%iam_io_pe ) THEN 2166 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2167 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2168 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data ), MPI_REAL, status, ierr ) 2169 ENDIF 2170 IF ( sm_io%is_sm_active() ) THEN 2171 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, sm_io%comm_shared, ierr ) 2172 ENDIF 1579 2173 ENDIF 1580 2174 #else … … 1723 2317 !-- Set default view 1724 2318 #if defined( __parallel ) 1725 IF ( sm_io%iam_io_pe ) THEN 1726 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 1727 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 1728 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 1729 ENDIF 1730 IF ( sm_io%is_sm_active() ) THEN 1731 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 2319 IF ( cyclic_fill_mode ) THEN !kk This may be the general solution for all cases 2320 IF ( pe_active_for_read ) THEN 2321 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2322 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2323 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 2324 ENDIF 2325 CALL MPI_BCAST( data, SIZE( data ), MPI_REAL, 0, comm2d, ierr ) 2326 ELSE 2327 IF ( sm_io%iam_io_pe ) THEN 2328 CALL MPI_FILE_SET_VIEW( fh, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2329 CALL MPI_FILE_SEEK( fh, array_position, MPI_SEEK_SET, ierr ) 2330 CALL MPI_FILE_READ_ALL( fh, data, SIZE( data), MPI_INTEGER, status, ierr ) 2331 ENDIF 2332 IF ( sm_io%is_sm_active() ) THEN 2333 CALL MPI_BCAST( data, SIZE( data ), MPI_INTEGER, 0, sm_io%comm_shared, ierr ) 2334 ENDIF 1732 2335 ENDIF 1733 2336 #else … … 1976 2579 lo_first_index = 1 1977 2580 1978 IF ( MAXVAL( m_global_start ) == -1 ) RETURN ! Nothing to do on this PE1979 1980 2581 IF ( PRESENT( first_index ) ) THEN 1981 2582 lo_first_index = first_index … … 1996 2597 IF ( found ) THEN 1997 2598 1998 DO i = nxl, nxr 1999 DO j = nys, nyn 2000 2001 IF ( m_global_start(j,i) > 0 ) THEN 2002 disp = array_position+(m_global_start(j,i)-1) * wp 2003 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 2004 nr_bytes = nr_words * wp 2005 ENDIF 2006 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry 2007 disp_f = disp 2008 nr_bytes_f = 0 2009 i_f = i 2010 j_f = j 2011 ENDIF 2012 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry 2013 disp_n = -1 2014 IF ( nr_bytes > 0 ) THEN 2015 nr_bytes_f = nr_bytes_f+nr_bytes 2599 IF ( cyclic_fill_mode ) THEN 2600 2601 CALL rrd_mpi_io_surface_cyclic_fill 2602 2603 ELSE 2604 2605 IF ( MAXVAL( m_global_start ) == -1 ) RETURN ! Nothing to do on this PE 2606 DO i = nxl, nxr 2607 DO j = nys, nyn 2608 2609 IF ( m_global_start(j,i) > 0 ) THEN 2610 disp = array_position+(m_global_start(j,i)-1) * wp 2611 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 2612 nr_bytes = nr_words * wp 2016 2613 ENDIF 2017 ELSEIF ( j == nyn ) THEN ! Next x 2018 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN 2019 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp 2614 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry 2615 disp_f = disp 2616 nr_bytes_f = 0 2617 i_f = i 2618 j_f = j 2619 ENDIF 2620 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry 2621 disp_n = -1 2622 IF ( nr_bytes > 0 ) THEN 2623 nr_bytes_f = nr_bytes_f+nr_bytes 2624 ENDIF 2625 ELSEIF ( j == nyn ) THEN ! Next x 2626 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN 2627 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp 2628 ELSE 2629 CYCLE 2630 ENDIF 2020 2631 ELSE 2021 CYCLE 2632 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN 2633 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp 2634 ELSE 2635 CYCLE 2636 ENDIF 2022 2637 ENDIF 2023 ELSE 2024 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN 2025 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp 2026 ELSE 2027 CYCLE 2638 2639 2640 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block 2641 nr_bytes_f = nr_bytes_f + nr_bytes 2642 ELSE ! Read 2643 #if defined( __parallel ) 2644 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr ) 2645 nr_words = nr_bytes_f / wp 2646 CALL MPI_FILE_READ( fhs, data(m_start_index(j_f,i_f)), nr_words, MPI_REAL, status, & 2647 ierr ) 2648 #else 2649 CALL posix_lseek( fh, disp_f ) 2650 CALL posix_read( fh, data(m_start_index(j_f,i_f):), nr_bytes_f ) 2651 #endif 2652 disp_f = disp 2653 nr_bytes_f = nr_bytes 2654 i_f = i 2655 j_f = j 2028 2656 ENDIF 2029 ENDIF 2030 2031 2032 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block 2033 nr_bytes_f = nr_bytes_f + nr_bytes 2034 ELSE ! Read 2035 #if defined( __parallel ) 2036 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr ) 2037 nr_words = nr_bytes_f / wp 2038 CALL MPI_FILE_READ( fhs, data(m_start_index(j_f,i_f)), nr_words, MPI_REAL, status, & 2039 ierr ) 2040 #else 2041 CALL posix_lseek( fh, disp_f ) 2042 CALL posix_read( fh, data(m_start_index(j_f,i_f):), nr_bytes_f ) 2043 #endif 2044 disp_f = disp 2045 nr_bytes_f = nr_bytes 2046 i_f = i 2047 j_f = j 2048 ENDIF 2049 2657 2658 ENDDO 2050 2659 ENDDO 2051 ENDDO 2660 ENDIF 2661 2052 2662 2053 2663 ELSE … … 2064 2674 ! lo_first_index,nr_val, SUM( data(1:nr_val) ) 2065 2675 ! ENDIF 2676 2677 2678 CONTAINS 2679 2680 SUBROUTINE rrd_mpi_io_surface_cyclic_fill 2681 2682 IMPLICIT NONE 2683 2684 INTEGER(iwp) :: i !< 2685 INTEGER(iwp) :: ie !< 2686 INTEGER(iwp) :: ierr !< 2687 INTEGER(iwp) :: is !< 2688 INTEGER(iwp) :: i_remote !< 2689 INTEGER(iwp) :: j !< 2690 INTEGER(iwp) :: je !< 2691 INTEGER(iwp) :: js !< 2692 INTEGER(iwp) :: j_remote !< 2693 INTEGER(iwp) :: nval !< 2694 INTEGER(iwp) :: rem_pe !< 2695 2696 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_offs !< 2697 2698 LOGICAL :: write_done !< 2699 2700 2701 ! 2702 !-- In the current version, there is only 1 value per grid cell allowed. 2703 !-- In this special case, the cyclical repetition can be done with the same method as for 2d-real 2704 !-- array. 2705 CALL prerun_grid%activate_grid_from_this_class() 2706 2707 IF ( pe_active_for_read ) THEN 2708 rmabuf_2d = -1.0 2709 DO i = nxl, nxr 2710 DO j = nys, nyn 2711 2712 IF ( m_global_start(j,i) > 0 ) THEN 2713 disp = array_position+(m_global_start(j,i)-1) * wp 2714 nr_words = m_end_index(j,i)-m_start_index(j,i)+1 2715 nr_bytes = nr_words * wp 2716 ENDIF 2717 IF ( disp >= 0 .AND. disp_f == -1 ) THEN ! First entry 2718 disp_f = disp 2719 nr_bytes_f = 0 2720 write_done = .TRUE. 2721 ENDIF 2722 IF( write_done ) THEN 2723 i_f = i 2724 j_f = j 2725 write_done = .FALSE. 2726 ENDIF 2727 2728 IF ( j == nyn .AND. i == nxr ) THEN ! Last entry 2729 disp_n = -1 2730 IF ( nr_bytes > 0 ) THEN 2731 nr_bytes_f = nr_bytes_f+nr_bytes 2732 ENDIF 2733 ELSEIF ( j == nyn ) THEN ! Next x 2734 IF ( m_global_start(nys,i+1) > 0 .AND. disp > 0 ) THEN 2735 disp_n = array_position + ( m_global_start(nys,i+1) - 1 ) * wp 2736 ELSE 2737 CYCLE 2738 ENDIF 2739 ELSE 2740 IF ( m_global_start(j+1,i) > 0 .AND. disp > 0 ) THEN 2741 disp_n = array_position + ( m_global_start(j+1,i) - 1 ) * wp 2742 ELSE 2743 CYCLE 2744 ENDIF 2745 ENDIF 2746 2747 2748 IF ( disp + nr_bytes == disp_n ) THEN ! Contiguous block 2749 nr_bytes_f = nr_bytes_f + nr_bytes 2750 ELSE ! Read 2751 #if defined( __parallel ) 2752 CALL MPI_FILE_SEEK( fhs, disp_f, MPI_SEEK_SET, ierr ) 2753 nr_words = nr_bytes_f / wp 2754 CALL MPI_FILE_READ( fhs, rmabuf_2d(j_f,i_f), nr_words, MPI_REAL, status, ierr ) 2755 #else 2756 CALL posix_lseek( fh, disp_f ) 2757 CALL posix_read( fh, rmabuf_2d(j_f,i_f), nr_bytes_f ) 2758 #endif 2759 2760 disp_f = disp 2761 nr_bytes_f = nr_bytes 2762 write_done = .TRUE. 2763 ENDIF 2764 2765 ENDDO 2766 ENDDO 2767 2768 ENDIF 2769 2770 CALL mainrun_grid%activate_grid_from_this_class() 2771 2772 #if defined( __parallel ) 2773 ! 2774 !-- Close RMA window to allow remote access 2775 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 2776 #endif 2777 2778 IF ( .NOT. pe_active_for_read ) THEN 2779 2780 is = nxl 2781 ie = nxr 2782 js = nys 2783 je = nyn 2784 2785 ELSE 2786 2787 is = nxl 2788 ie = nxr 2789 js = prerun_grid%nys+1 2790 je = nyn 2791 2792 DO i = is, ie 2793 DO j = js, je 2794 i_remote = MOD(i,nx_on_file+1) 2795 j_remote = MOD(j,ny_on_file+1) 2796 rem_pe = remote_pe(i_remote,j_remote) 2797 rem_offs = rma_offset(i_remote,j_remote) 2798 nval = 1 2799 2800 #if defined( __parallel ) 2801 IF ( rem_pe /= myid ) THEN 2802 CALL MPI_GET( data(m_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, & 2803 MPI_REAL, rmawin_2d, ierr) 2804 ELSE 2805 data(m_start_index(j,i)) = rmabuf_2d(j_remote,i_remote) 2806 ENDIF 2807 #else 2808 data(m_start_index(j,i)) = array_2d(i_remote,j_remote) 2809 #endif 2810 ENDDO 2811 ENDDO 2812 is = prerun_grid%nxr+1 2813 ie = nxr 2814 js = nys 2815 je = nyn 2816 2817 ENDIF 2818 2819 DO i = is, ie 2820 DO j = js, je 2821 i_remote = MOD(i,nx_on_file+1) 2822 j_remote = MOD(j,ny_on_file+1) 2823 rem_pe = remote_pe(i_remote,j_remote) 2824 rem_offs = rma_offset(i_remote,j_remote) 2825 nval = 1 2826 2827 #if defined( __parallel ) 2828 IF ( rem_pe /= myid ) THEN 2829 CALL MPI_GET( data(m_start_index(j,i)), nval, MPI_REAL, rem_pe, rem_offs, nval, & 2830 MPI_REAL, rmawin_2d, ierr) 2831 ELSE 2832 data(m_start_index(j,i)) = rmabuf_2d(j_remote,i_remote) 2833 ENDIF 2834 #else 2835 data(m_tart_index(j,i)) = array_2d(i_remote,j_remote) 2836 #endif 2837 ENDDO 2838 ENDDO 2839 2840 #if defined( __parallel ) 2841 ! 2842 !-- Reopen RMA window to allow filling 2843 CALL MPI_WIN_FENCE( 0, rmawin_2d, ierr ) 2844 #endif 2845 2846 END SUBROUTINE rrd_mpi_io_surface_cyclic_fill 2066 2847 2067 2848 END SUBROUTINE rrd_mpi_io_surface … … 2254 3035 tgh%nr_real = header_real_index - 1 2255 3036 tgh%nr_arrays = header_array_index - 1 2256 tgh%total_nx = lb%nx + 12257 tgh%total_ny = lb%ny + 13037 tgh%total_nx = iog%nx + 1 3038 tgh%total_ny = iog%ny + 1 2258 3039 IF ( include_total_domain_boundaries ) THEN ! Not sure, if LOGICAL interpretation is the same for all compilers, 2259 3040 tgh%i_outer_bound = 1 ! therefore store as INTEGER in general header … … 2368 3149 ENDIF 2369 3150 #endif 2370 3151 ! 3152 !-- Free RMA windows 3153 IF ( cyclic_fill_mode ) THEN 3154 CALL MPI_WIN_FREE( rmawin_2di, ierr ) 3155 CALL MPI_WIN_FREE( rmawin_2d, ierr ) 3156 CALL MPI_WIN_FREE( rmawin_3d, ierr ) 3157 ENDIF 3158 3159 IF (.NOT. pe_active_for_read ) RETURN 3160 ! 3161 !-- TODO: better explain the following message 3162 !-- In case on non cyclic read, pe_active_for_read is set .TRUE. 2371 3163 IF ( sm_io%iam_io_pe ) THEN 2372 3164 … … 2402 3194 2403 3195 INTEGER(iwp) :: i !< loop index 3196 INTEGER(iwp) :: j !< loop index 2404 3197 INTEGER(KIND=rd_offset_kind) :: offset !< 2405 3198 … … 2412 3205 2413 3206 2414 INTEGER, INTENT(IN ), DIMENSION(nys:nyn,nxl:nxr) :: end_index !<2415 INTEGER, INTENT(OUT), DIMENSION(nys:nyn,nxl:nxr) :: global_start !<2416 INTEGER, INTENT(IN ), DIMENSION(nys:nyn,nxl:nxr) :: start_index !<3207 INTEGER, INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: end_index !< 3208 INTEGER, INTENT(OUT), DIMENSION(nys:nyn,nxl:nxr) :: global_start !< 3209 INTEGER, INTENT(INOUT), DIMENSION(nys:nyn,nxl:nxr) :: start_index !< 2417 3210 2418 3211 LOGICAL, INTENT(OUT) :: data_to_write !< returns, if surface data have to be written 2419 2420 2421 offset = 02422 lo_nr_val= 02423 lo_nr_val(myid) = MAXVAL( end_index )2424 #if defined( __parallel )2425 CALL MPI_ALLREDUCE( lo_nr_val, all_nr_val, numprocs, MPI_INTEGER, MPI_SUM, comm2d, ierr )2426 IF ( ft_surf /= -1 .AND. sm_io%iam_io_pe ) THEN2427 CALL MPI_TYPE_FREE( ft_surf, ierr ) ! If set, free last surface filetype2428 ENDIF2429 2430 IF ( win_surf /= -1 ) THEN2431 IF ( sm_io%is_sm_active() ) THEN2432 CALL MPI_WIN_FREE( win_surf, ierr )2433 ENDIF2434 win_surf = -12435 ENDIF2436 2437 IF ( sm_io%is_sm_active() .AND. rd_flag ) THEN2438 IF ( fhs == -1 ) THEN2439 CALL MPI_FILE_OPEN( comm2d, TRIM( io_file_name ), MPI_MODE_RDONLY, MPI_INFO_NULL, fhs, &2440 ierr )2441 ENDIF2442 ELSE2443 fhs = fh2444 ENDIF2445 #else2446 all_nr_val(myid) = lo_nr_val(myid)2447 #endif2448 nr_val = lo_nr_val(myid)2449 2450 total_number_of_surface_values = 02451 DO i = 0, numprocs-12452 IF ( i == myid ) THEN2453 glo_start = total_number_of_surface_values + 12454 ENDIF2455 total_number_of_surface_values = total_number_of_surface_values + all_nr_val(i)2456 ENDDO2457 3212 2458 3213 ! 2459 3214 !-- Actions during reading 2460 3215 IF ( rd_flag ) THEN 3216 ! 3217 !-- Set start index and end index for the mainrun grid. 3218 !-- ATTENTION: This works only for horizontal surfaces with one vale per grid cell!!! 3219 IF ( cyclic_fill_mode ) THEN 3220 DO i = nxl, nxr 3221 DO j = nys, nyn 3222 start_index (j,i) = (i-nxl) * nny + j - nys + 1 3223 end_index (j,i) = start_index(j,i) 3224 ENDDO 3225 ENDDO 3226 ENDIF 3227 2461 3228 IF ( .NOT. ALLOCATED( m_start_index ) ) ALLOCATE( m_start_index(nys:nyn,nxl:nxr) ) 2462 3229 IF ( .NOT. ALLOCATED( m_end_index ) ) ALLOCATE( m_end_index(nys:nyn,nxl:nxr) ) … … 2469 3236 nr_val = MAXVAL( end_index ) 2470 3237 3238 ENDIF 3239 3240 IF ( .NOT. pe_active_for_read ) RETURN 3241 3242 IF ( cyclic_fill_mode ) CALL prerun_grid%activate_grid_from_this_class() 3243 3244 offset = 0 3245 lo_nr_val= 0 3246 lo_nr_val(myid) = MAXVAL( end_index ) 3247 #if defined( __parallel ) 3248 CALL MPI_ALLREDUCE( lo_nr_val, all_nr_val, numprocs, MPI_INTEGER, MPI_SUM, comm2d, ierr ) 3249 IF ( ft_surf /= -1 .AND. sm_io%iam_io_pe ) THEN 3250 CALL MPI_TYPE_FREE( ft_surf, ierr ) ! If set, free last surface filetype 3251 ENDIF 3252 3253 IF ( win_surf /= -1 ) THEN 3254 IF ( sm_io%is_sm_active() ) THEN 3255 CALL MPI_WIN_FREE( win_surf, ierr ) 3256 ENDIF 3257 win_surf = -1 3258 ENDIF 3259 3260 IF ( sm_io%is_sm_active() .AND. rd_flag ) THEN 3261 IF ( fhs == -1 ) THEN 3262 CALL MPI_FILE_OPEN( comm2d, TRIM( io_file_name ), MPI_MODE_RDONLY, MPI_INFO_NULL, fhs, & 3263 ierr ) 3264 ENDIF 3265 ELSE 3266 fhs = fh 3267 ENDIF 3268 #else 3269 all_nr_val(myid) = lo_nr_val(myid) 3270 #endif 3271 nr_val = lo_nr_val(myid) 3272 3273 total_number_of_surface_values = 0 3274 DO i = 0, numprocs-1 3275 IF ( i == myid ) THEN 3276 glo_start = total_number_of_surface_values + 1 3277 ENDIF 3278 total_number_of_surface_values = total_number_of_surface_values + all_nr_val(i) 3279 ENDDO 3280 3281 ! 3282 !-- Actions during reading 3283 IF ( rd_flag ) THEN 3284 2471 3285 #if defined( __parallel ) 2472 3286 CALL MPI_FILE_SET_VIEW( fhs, offset, MPI_BYTE, MPI_BYTE, 'native', MPI_INFO_NULL, ierr ) 2473 3287 #endif 2474 3288 ENDIF 3289 3290 IF ( cyclic_fill_mode ) CALL mainrun_grid%activate_grid_from_this_class() 2475 3291 2476 3292 ! … … 2563 3379 INTEGER, DIMENSION(3) :: start3 !< 2564 3380 2565 TYPE( local_boundaries) :: save_io_grid !< temporary variable to store grid settings3381 TYPE(domain_decomposition_grid_features) :: save_io_grid !< temporary variable to store grid settings 2566 3382 2567 3383 … … 2569 3385 save_io_grid = sm_io%io_grid 2570 3386 ENDIF 3387 3388 IF( .NOT. pe_active_for_read ) RETURN 3389 3390 IF ( cyclic_fill_mode ) CALL prerun_grid%activate_grid_from_this_class() 2571 3391 2572 3392 ! … … 2576 3396 IF ( include_total_domain_boundaries ) THEN 2577 3397 2578 lb%nxl = nxl + nbgp2579 lb%nxr = nxr + nbgp2580 lb%nys = nys + nbgp2581 lb%nyn = nyn + nbgp2582 lb%nnx = nnx2583 lb%nny = nny2584 lb%nx = nx + 2 * nbgp2585 lb%ny = ny + 2 * nbgp3398 iog%nxl = nxl + nbgp 3399 iog%nxr = nxr + nbgp 3400 iog%nys = nys + nbgp 3401 iog%nyn = nyn + nbgp 3402 iog%nnx = nnx 3403 iog%nny = nny 3404 iog%nx = nx + 2 * nbgp 3405 iog%ny = ny + 2 * nbgp 2586 3406 IF ( myidx == 0 ) THEN 2587 lb%nxl = lb%nxl - nbgp2588 lb%nnx = lb%nnx + nbgp3407 iog%nxl = iog%nxl - nbgp 3408 iog%nnx = iog%nnx + nbgp 2589 3409 ENDIF 2590 3410 IF ( myidx == npex-1 .OR. npex == -1 ) THEN ! npex == 1 if -D__parallel not set 2591 lb%nxr = lb%nxr + nbgp2592 lb%nnx = lb%nnx + nbgp3411 iog%nxr = iog%nxr + nbgp 3412 iog%nnx = iog%nnx + nbgp 2593 3413 ENDIF 2594 3414 IF ( myidy == 0 ) THEN 2595 lb%nys = lb%nys - nbgp2596 lb%nny = lb%nny + nbgp3415 iog%nys = iog%nys - nbgp 3416 iog%nny = iog%nny + nbgp 2597 3417 ENDIF 2598 3418 IF ( myidy == npey-1 .OR. npey == -1 ) THEN ! npey == 1 if -D__parallel not set 2599 lb%nyn = lb%nyn + nbgp2600 lb%nny = lb%nny + nbgp3419 iog%nyn = iog%nyn + nbgp 3420 iog%nny = iog%nny + nbgp 2601 3421 ENDIF 2602 3422 … … 2605 3425 ELSE 2606 3426 2607 lb%nxl = nxl2608 lb%nxr = nxr2609 lb%nys = nys2610 lb%nyn = nyn2611 lb%nnx = nnx2612 lb%nny = nny2613 lb%nx = nx2614 lb%ny = ny3427 iog%nxl = nxl 3428 iog%nxr = nxr 3429 iog%nys = nys 3430 iog%nyn = nyn 3431 iog%nnx = nnx 3432 iog%nny = nny 3433 iog%nx = nx 3434 iog%ny = ny 2615 3435 2616 3436 ENDIF … … 2626 3446 #endif 2627 3447 ELSE 2628 ALLOCATE( array_2d( lb%nxl:lb%nxr,lb%nys:lb%nyn) )3448 ALLOCATE( array_2d(iog%nxl:iog%nxr,iog%nys:iog%nyn) ) 2629 3449 ALLOCATE( array_2di(nxl:nxr,nys:nyn) ) 2630 ALLOCATE( array_3d(nzb:nzt+1, lb%nxl:lb%nxr,lb%nys:lb%nyn) )2631 sm_io%io_grid = lb3450 ALLOCATE( array_3d(nzb:nzt+1,iog%nxl:iog%nxr,iog%nys:iog%nyn) ) 3451 sm_io%io_grid = iog 2632 3452 ENDIF 2633 3453 2634 3454 ! 2635 3455 !-- Create filetype for 2d-REAL array with ghost layers around the total domain 2636 dims2(1) = lb%nx + 12637 dims2(2) = lb%ny + 13456 dims2(1) = iog%nx + 1 3457 dims2(2) = iog%ny + 1 2638 3458 2639 3459 lize2(1) = sm_io%io_grid%nnx … … 2683 3503 !-- Create filetype for 3d-REAL array 2684 3504 dims3(1) = nz + 2 2685 dims3(2) = lb%nx + 12686 dims3(3) = lb%ny + 13505 dims3(2) = iog%nx + 1 3506 dims3(3) = iog%ny + 1 2687 3507 2688 3508 lize3(1) = dims3(1) … … 2701 3521 ENDIF 2702 3522 #endif 3523 3524 IF ( cyclic_fill_mode ) CALL mainrun_grid%activate_grid_from_this_class() 2703 3525 2704 3526 END SUBROUTINE rd_mpi_io_create_filetypes … … 2730 3552 win_3ds ) 2731 3553 ELSE 2732 ALLOCATE( array_3d_soil(nzb_soil:nzt_soil, lb%nxl:lb%nxr,lb%nys:lb%nyn) )2733 sm_io%io_grid = lb3554 ALLOCATE( array_3d_soil(nzb_soil:nzt_soil,iog%nxl:iog%nxr,iog%nys:iog%nyn) ) 3555 sm_io%io_grid = iog 2734 3556 ENDIF 2735 3557 … … 2737 3559 !-- Create filetype for 3d-soil array 2738 3560 dims3(1) = nzt_soil - nzb_soil + 1 2739 dims3(2) = lb%nx + 12740 dims3(3) = lb%ny + 13561 dims3(2) = iog%nx + 1 3562 dims3(3) = iog%ny + 1 2741 3563 2742 3564 lize3(1) = dims3(1) -
palm/trunk/SOURCE/shared_memory_io_mod.f90
r4591 r4617 25 25 ! $Id$ 26 26 ! 27 ! Additions for cyclic fill mode 28 ! 29 ! 27 30 ! File re-formatted to follow the PALM coding standard 28 31 ! 29 32 ! 30 !31 33 ! Initial version (Klaus Ketelsen) 32 34 ! … … 76 78 77 79 USE kinds, & 78 ONLY: iwp, & 80 ONLY: dp, & 81 iwp, & 82 sp, & 79 83 wp 80 81 82 USE transpose_indices, &83 ONLY: nxl_z, &84 nxr_z, &85 nyn_x, &86 nyn_z, &87 nys_x, &88 nys_z89 90 91 84 92 85 USE pegrid, & … … 121 114 122 115 ! 123 !-- Type to store grid information 124 TYPE, PUBLIC :: local_boundaries !< 125 126 INTEGER(iwp) :: nnx !< 127 INTEGER(iwp) :: nny !< 128 INTEGER(iwp) :: nx !< 129 INTEGER(iwp) :: nxl !< 130 INTEGER(iwp) :: nxr !< 131 INTEGER(iwp) :: ny !< 132 INTEGER(iwp) :: nyn !< 133 INTEGER(iwp) :: nys !< 134 135 136 137 138 END TYPE local_boundaries 116 !-- Type to store information about the domain decomposition grid 117 TYPE, PUBLIC :: domain_decomposition_grid_features !< 118 119 INTEGER(iwp) :: comm2d !< 120 INTEGER(iwp) :: myid !< 121 INTEGER(iwp) :: nnx !< 122 INTEGER(iwp) :: nny !< 123 INTEGER(iwp) :: nx !< 124 INTEGER(iwp) :: nxl !< 125 INTEGER(iwp) :: nxr !< 126 INTEGER(iwp) :: ny !< 127 INTEGER(iwp) :: nyn !< 128 INTEGER(iwp) :: nys !< 129 INTEGER(iwp) :: numprocs !< 130 131 CONTAINS 132 133 PROCEDURE, PASS(this), PUBLIC :: activate_grid_from_this_class 134 PROCEDURE, PASS(this), PUBLIC :: save_grid_into_this_class 135 136 END TYPE domain_decomposition_grid_features 139 137 140 138 ! … … 145 143 INTEGER(iwp) :: nr_io_pe_per_node = 2 !< typical configuration, 2 sockets per node 146 144 LOGICAL :: no_shared_Memory_in_this_run !< 145 146 INTEGER(iwp) :: comm_model !< communicator of this model run 147 147 ! 148 148 !-- Variables for the shared memory communicator 149 INTEGER(iwp), PUBLIC :: comm_shared !< Communicator for processes with shared array149 INTEGER(iwp), PUBLIC :: comm_shared !< communicator for processes with shared array 150 150 INTEGER(iwp), PUBLIC :: sh_npes !< 151 151 INTEGER(iwp), PUBLIC :: sh_rank !< … … 157 157 INTEGER(iwp), PUBLIC :: io_npes !< 158 158 INTEGER(iwp), PUBLIC :: io_rank !< 159 160 TYPE( local_boundaries ), PUBLIC :: io_grid161 162 159 ! 163 160 !-- Variables for the node local communicator … … 167 164 INTEGER(iwp) :: n_rank !< 168 165 169 CONTAINS 170 171 PRIVATE 172 173 PROCEDURE, PASS(this), PUBLIC :: is_sm_active !< 174 PROCEDURE, PASS(this), PUBLIC :: sm_adjust_outer_boundary !< 175 PROCEDURE, PASS(this), PUBLIC :: sm_free_shared !< 176 PROCEDURE, PASS(this), PUBLIC :: sm_init_comm !< 177 PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier !< 166 TYPE(domain_decomposition_grid_features), PUBLIC :: io_grid !< io grid features, depending on reading from prerun or restart run 167 168 169 CONTAINS 170 171 PRIVATE 172 173 PROCEDURE, PASS(this), PUBLIC :: is_sm_active 174 PROCEDURE, PASS(this), PUBLIC :: sm_adjust_outer_boundary 175 PROCEDURE, PASS(this), PUBLIC :: sm_free_shared 176 PROCEDURE, PASS(this), PUBLIC :: sm_init_comm 177 PROCEDURE, PASS(this), PUBLIC :: sm_init_part 178 PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier 178 179 #if defined( __parallel ) 179 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d !< 180 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d !< 181 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di !< 182 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d !< 183 184 GENERIC, PUBLIC :: sm_allocate_shared => sm_allocate_shared_1d, sm_allocate_shared_2d, & 185 sm_allocate_shared_2di, sm_allocate_shared_3d !< 180 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_64 181 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_32 182 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1di 183 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_64 184 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_32 185 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di 186 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_64 187 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_32 188 189 GENERIC, PUBLIC :: sm_allocate_shared => & 190 sm_allocate_shared_1d_64, sm_allocate_shared_1d_32, & 191 sm_allocate_shared_2d_64, sm_allocate_shared_2d_32, & 192 sm_allocate_shared_2di, sm_allocate_shared_3d_64, & 193 sm_allocate_shared_3d_32, sm_allocate_shared_1di 186 194 #endif 187 195 END TYPE sm_class … … 197 205 !> Setup the grid for shared memory IO. 198 206 !--------------------------------------------------------------------------------------------------! 199 SUBROUTINE sm_init_comm( this, sm_active ) 200 201 IMPLICIT NONE 202 203 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 207 SUBROUTINE sm_init_comm( this, sm_active, comm_input ) 208 209 IMPLICIT NONE 210 211 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 212 INTEGER, INTENT(IN), OPTIONAL :: comm_input !< main model communicator (comm2d) can optional be set 204 213 205 214 #if defined( __parallel ) 206 INTEGER :: color !<207 INTEGER :: max_n_npes !< Maximum number of PEs/node215 INTEGER :: color 216 INTEGER :: max_n_npes !< maximum number of PEs/node 208 217 #endif 209 218 210 LOGICAL, INTENT(IN) :: sm_active !< Flag to activate shared-memory IO 211 219 LOGICAL, INTENT(IN) :: sm_active !< flag to activate shared-memory IO 220 221 IF ( PRESENT( comm_input ) ) THEN 222 this%comm_model = comm_input 223 ELSE 224 this%comm_model = comm2d 225 ENDIF 212 226 213 227 this%no_shared_memory_in_this_run = .NOT. sm_active 228 this%comm_io = this%comm_model ! preset in case of non shared-memory-IO 214 229 215 230 IF ( this%no_shared_memory_in_this_run ) THEN … … 222 237 !-- Determine, how many MPI threads are running on a node 223 238 this%iam_io_pe = .FALSE. 224 CALL MPI_COMM_SPLIT_TYPE( comm2d, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, this%comm_node, ierr ) 239 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & 240 this%comm_node, ierr ) 225 241 CALL MPI_COMM_SIZE( this%comm_node, this%n_npes, ierr ) 226 242 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 227 243 228 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, comm2d, ierr )244 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr ) 229 245 ! 230 246 !-- Decide, if the configuration can run with shared-memory IO … … 267 283 !-- All threads with shared memory rank 0 will be I/O threads. 268 284 color = this%sh_rank 269 CALL MPI_COMM_SPLIT( comm2d, color, 0, this%comm_io, ierr )285 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) 270 286 271 287 IF ( this%comm_io /= MPI_COMM_NULL ) THEN … … 287 303 #endif 288 304 305 ! write(9,'(a,8i7)') ' end of sm_init_comm ',this%sh_rank,this%sh_npes,this%io_rank,this%io_npes,this%io_pe_global_rank 306 ! write(9,*) 'This process is IO Process ',this%iam_io_pe 307 289 308 #if defined( __parallel ) 290 309 CONTAINS … … 305 324 INTEGER(iwp), DIMENSION(4,0:this%n_npes-1) :: local_dim_r !< 306 325 307 TYPE( local_boundaries), DIMENSION(32) :: node_grid !<326 TYPE(domain_decomposition_grid_features), DIMENSION(32) :: node_grid !< 308 327 309 328 ! … … 381 400 382 401 402 ! 403 !-- TODO: short description required, about the meaning of the following routine 404 !-- part must be renamed particles! 405 SUBROUTINE sm_init_part( this ) 406 407 IMPLICIT NONE 408 409 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 410 411 #if defined( __parallel ) 412 INTEGER(iwp) :: color !< 413 INTEGER(iwp) :: comm_shared_base !< 414 INTEGER(iwp) :: ierr !< 415 INTEGER(iwp) :: max_n_npes !< maximum number of PEs/node 416 417 LOGICAL :: sm_active !< 418 #endif 419 420 421 sm_active = .TRUE. ! particle IO always uses shared memory 422 this%comm_model = comm2d 423 424 this%no_shared_memory_in_this_run = .NOT. sm_active 425 this%comm_io = this%comm_model ! preset in case of non shared-memory-IO 426 427 IF ( this%no_shared_memory_in_this_run ) THEN 428 this%iam_io_pe = .TRUE. 429 RETURN 430 ENDIF 431 432 #if defined( __parallel ) 433 ! 434 !-- Determine, how many MPI threads are running on a node 435 this%iam_io_pe = .FALSE. 436 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & 437 this%comm_node, ierr ) 438 CALL MPI_COMM_SIZE( this%comm_node, this%n_npes, ierr ) 439 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 440 441 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr ) 442 443 ! 444 !-- TODO: better explanation 445 !-- It has to be testet, if using memory blocks for an IO process (MPI shared Memory), or if it is 446 !-- even better to use the complete node for MPI shared memory (this%nr_io_pe_per_node = 1). 447 !- In the latter case, the access to the MPI shared memory buffer is slower, the number of 448 !-- particles to move between threads will be much smaller. 449 IF ( max_n_npes > 64 ) THEN 450 ! 451 !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node 452 this%nr_io_pe_per_node = 4 453 ENDIF 454 455 IF ( this%nr_io_pe_per_node == 1 ) THEN 456 ! 457 !-- This branch is not realized so far 458 this%iam_io_pe = ( this%n_rank == 0 ) 459 this%comm_shared = this%comm_node 460 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 461 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 462 463 ELSEIF( this%nr_io_pe_per_node == 2 ) THEN 464 465 this%iam_io_pe = ( this%n_rank == 0 .OR. this%n_rank == this%n_npes/2 ) 466 IF ( this%n_rank < this%n_npes/2 ) THEN 467 color = 1 468 ELSE 469 color = 2 470 ENDIF 471 CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) 472 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 473 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 474 475 ELSEIF( this%nr_io_pe_per_node == 4 ) THEN 476 477 this%iam_io_pe = ( this%n_rank == 0 .OR. this%n_rank == this%n_npes/4 .OR. & 478 this%n_rank == this%n_npes/2 .OR. this%n_rank == (3*this%n_npes)/4 ) 479 IF ( this%n_rank < this%n_npes/4 ) THEN 480 color = 1 481 ELSEIF( this%n_rank < this%n_npes/2 ) THEN 482 color = 2 483 ELSEIF( this%n_rank < (3*this%n_npes)/4 ) THEN 484 color = 3 485 ELSE 486 color = 4 487 ENDIF 488 CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) 489 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 490 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 491 492 ELSE 493 494 WRITE( *, * ) 'shared_memory_io_mod: internal error' 495 WRITE( *, * ) 'only 2 or 4 shared memory groups per node are allowed' 496 STOP 497 498 ENDIF 499 500 ! 501 !-- Setup the shared memory area 502 CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) 503 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 504 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 505 506 ! 507 !-- Setup the communicator across the nodes depending on the shared memory rank. 508 !-- All threads with shared memory rank 0 will be I/O threads. 509 color = this%sh_rank 510 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) 511 512 IF ( this%comm_io /= MPI_COMM_NULL ) THEN 513 CALL MPI_COMM_SIZE( this%comm_io, this%io_npes, ierr ) 514 CALL MPI_COMM_RANK( this%comm_io, this%io_rank, ierr ) 515 ELSE 516 this%io_npes = -1 517 this%io_rank = -1 518 ENDIF 519 520 IF ( this%sh_rank == 0 ) THEN 521 this%iam_io_pe = .TRUE. 522 this%io_pe_global_rank = myid 523 ENDIF 524 CALL MPI_BCAST( this%io_pe_global_rank, 1, MPI_INTEGER, 0, this%comm_shared, ierr ) 525 526 #else 527 this%iam_io_pe = .FALSE. 528 #endif 529 530 ! write(9,'(a,8i7)') 'sm_init_comm_part ',this%sh_rank,this%sh_npes,this%io_rank,this%io_npes 531 532 END SUBROUTINE sm_init_part 383 533 384 534 !--------------------------------------------------------------------------------------------------! … … 401 551 402 552 #if defined( __parallel ) 403 !--------------------------------------------------------------------------------------------------! 404 ! Description: 405 ! ------------ 406 !> Allocate shared 1d-REAL array on ALL threads 407 !--------------------------------------------------------------------------------------------------! 408 SUBROUTINE sm_allocate_shared_1d( this, p1, d1, d2, win ) 409 410 IMPLICIT NONE 411 412 CLASS(sm_class), INTENT(inout) :: this !< 413 !< 414 INTEGER(iwp) :: disp_unit !< 415 INTEGER(iwp), INTENT(IN) :: d1 !< 416 INTEGER(iwp), INTENT(IN) :: d2 !< 417 INTEGER(iwp), SAVE :: pe_from = 0 !< 418 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 419 INTEGER(iwp), INTENT(OUT) :: win !< 420 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 421 422 INTEGER, DIMENSION(1) :: buf_shape !< 423 424 REAL(wp), DIMENSION(:), POINTER :: buf !< 425 REAL(wp), DIMENSION(:), POINTER :: p1 !< 426 427 TYPE(C_PTR), SAVE :: base_ptr !< 428 TYPE(C_PTR), SAVE :: rem_ptr !< 553 554 !--------------------------------------------------------------------------------------------------! 555 ! Description: 556 ! ------------ 557 !> Allocate shared 1d-REAL (64 Bit) array on ALL threads 558 !--------------------------------------------------------------------------------------------------! 559 SUBROUTINE sm_allocate_shared_1d_64( this, p1, d1, d2, win ) 560 561 IMPLICIT NONE 562 563 CLASS(sm_class), INTENT(inout) :: this 564 565 INTEGER(iwp) :: disp_unit 566 INTEGER(iwp), INTENT(IN) :: d1 567 INTEGER(iwp), INTENT(IN) :: d2 568 INTEGER(iwp), SAVE :: pe_from = 0 569 INTEGER(iwp), INTENT(OUT) :: win 570 571 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 572 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 573 574 INTEGER, DIMENSION(1) :: buf_shape 575 576 REAL(dp), DIMENSION(:), POINTER :: buf 577 REAL(dp), DIMENSION(:), POINTER :: p1 578 579 TYPE(C_PTR), SAVE :: base_ptr 580 TYPE(C_PTR), SAVE :: rem_ptr 429 581 430 582 … … 437 589 wsize = 1 438 590 ENDIF 439 wsize = wsize * 8 ! Please note, size is always in bytes, independently of the displacement440 ! unit441 442 CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr )591 wsize = wsize * dp ! please note, size is always in bytes, independently of the displacement 592 ! unit 593 594 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) 443 595 ! 444 596 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) … … 453 605 pe_from = MOD( pe_from, this%sh_npes ) 454 606 455 END SUBROUTINE sm_allocate_shared_1d 456 457 458 !--------------------------------------------------------------------------------------------------! 459 ! Description: 460 ! ------------ 461 !> Allocate shared 2d-REAL array on ALL threads 462 !--------------------------------------------------------------------------------------------------! 463 SUBROUTINE sm_allocate_shared_2d( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) 464 465 IMPLICIT NONE 466 467 CLASS(sm_class), INTENT(INOUT) :: this !< 468 469 INTEGER(iwp) :: disp_unit !< 470 INTEGER(iwp), INTENT(IN) :: n_nxlg !< 471 INTEGER(iwp), INTENT(IN) :: n_nxrg !< 472 INTEGER(iwp), INTENT(IN) :: n_nyng !< 473 INTEGER(iwp), INTENT(IN) :: n_nysg !< 474 INTEGER(iwp), SAVE :: pe_from = 0 !< 475 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 476 INTEGER(iwp), INTENT(OUT) :: win !< 477 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 478 479 INTEGER(iwp), DIMENSION(2) :: buf_shape !< 480 481 REAL(wp), DIMENSION(:,:), POINTER :: buf !< 482 REAL(wp), DIMENSION(:,:), POINTER :: p2 !< 483 484 TYPE(C_PTR),SAVE :: base_ptr !< 485 TYPE(C_PTR),SAVE :: rem_ptr !< 607 END SUBROUTINE sm_allocate_shared_1d_64 608 609 610 !--------------------------------------------------------------------------------------------------! 611 ! Description: 612 ! ------------ 613 !> Allocate shared 1d-REAL (32 Bit) array on ALL threads 614 !--------------------------------------------------------------------------------------------------! 615 SUBROUTINE sm_allocate_shared_1d_32( this, p1, d1, d2, win ) 616 617 IMPLICIT NONE 618 619 CLASS(sm_class), INTENT(inout) :: this 620 621 INTEGER(iwp) :: disp_unit 622 INTEGER(iwp), INTENT(IN) :: d1 623 INTEGER(iwp), INTENT(IN) :: d2 624 INTEGER(iwp), SAVE :: pe_from = 0 625 INTEGER(iwp), INTENT(OUT) :: win 626 627 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 628 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 629 630 INTEGER, DIMENSION(1) :: buf_shape 631 632 REAL(sp), DIMENSION(:), POINTER :: buf 633 REAL(sp), DIMENSION(:), POINTER :: p1 634 635 TYPE(C_PTR), SAVE :: base_ptr 636 TYPE(C_PTR), SAVE :: rem_ptr 637 638 639 IF ( this%no_shared_memory_in_this_run ) RETURN 640 ! 641 !-- Allocate shared memory on node rank 0 threads. 642 IF ( this%sh_rank == pe_from ) THEN 643 wsize = d2 - d1 + 1 644 ELSE 645 wsize = 1 646 ENDIF 647 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 648 ! unit 649 650 CALL MPI_WIN_ALLOCATE_SHARED( wsize, sp, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) 651 ! 652 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 653 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 654 ! 655 !-- Convert C- to Fortran-pointer 656 buf_shape(1) = d2 - d1 + 1 657 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 658 p1(d1:) => buf 659 ! 660 !-- Allocate shared memory in round robin on all PEs of a node. 661 pe_from = MOD( pe_from, this%sh_npes ) 662 663 END SUBROUTINE sm_allocate_shared_1d_32 664 665 666 !--------------------------------------------------------------------------------------------------! 667 ! Description: 668 ! ------------ 669 !> Allocate shared 1d-INTEGER array on ALL threads 670 !--------------------------------------------------------------------------------------------------! 671 SUBROUTINE sm_allocate_shared_1di( this, p1, d1, d2, win ) 672 673 IMPLICIT NONE 674 675 CLASS(sm_class), INTENT(inout) :: this 676 677 INTEGER(iwp) :: disp_unit 678 INTEGER(iwp), INTENT(IN) :: d1 679 INTEGER(iwp), INTENT(IN) :: d2 680 INTEGER(iwp), SAVE :: pe_from = 0 681 INTEGER(iwp), INTENT(OUT) :: win 682 683 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 684 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 685 686 INTEGER, DIMENSION(1) :: buf_shape 687 688 INTEGER(iwp), DIMENSION(:), POINTER :: buf 689 INTEGER(iwp), DIMENSION(:), POINTER :: p1 690 691 TYPE(C_PTR), SAVE :: base_ptr 692 TYPE(C_PTR), SAVE :: rem_ptr 693 694 695 IF ( this%no_shared_memory_in_this_run ) RETURN 696 ! 697 !-- Allocate shared memory on node rank 0 threads. 698 IF ( this%sh_rank == pe_from ) THEN 699 wsize = d2 - d1 + 1 700 ELSE 701 wsize = 1 702 ENDIF 703 wsize = wsize * iwp ! Please note, size is always in bytes, independently of the displacement 704 ! unit 705 706 CALL MPI_WIN_ALLOCATE_SHARED( wsize, iwp, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) 707 ! 708 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 709 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 710 ! 711 !-- Convert C- to Fortran-pointer 712 buf_shape(1) = d2 - d1 + 1 713 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 714 p1(d1:) => buf 715 ! 716 !-- Allocate shared memory in round robin on all PEs of a node. 717 pe_from = MOD( pe_from, this%sh_npes ) 718 719 END SUBROUTINE sm_allocate_shared_1di 720 721 722 !--------------------------------------------------------------------------------------------------! 723 ! Description: 724 ! ------------ 725 !> Allocate shared 2d-REAL array on ALL threads (64 Bit) 726 !--------------------------------------------------------------------------------------------------! 727 SUBROUTINE sm_allocate_shared_2d_64( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) 728 729 IMPLICIT NONE 730 731 CLASS(sm_class), INTENT(INOUT) :: this 732 733 INTEGER(iwp) :: disp_unit 734 INTEGER(iwp), INTENT(IN) :: n_nxlg 735 INTEGER(iwp), INTENT(IN) :: n_nxrg 736 INTEGER(iwp), INTENT(IN) :: n_nyng 737 INTEGER(iwp), INTENT(IN) :: n_nysg 738 INTEGER(iwp), SAVE :: pe_from = 0 739 INTEGER(iwp), INTENT(OUT) :: win 740 741 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 742 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 743 744 INTEGER(iwp), DIMENSION(2) :: buf_shape 745 746 REAL(dp), DIMENSION(:,:), POINTER :: buf 747 REAL(dp), DIMENSION(:,:), POINTER :: p2 748 749 TYPE(C_PTR), SAVE :: base_ptr 750 TYPE(C_PTR), SAVE :: rem_ptr 486 751 487 752 … … 495 760 ENDIF 496 761 497 wsize = wsize * 8! Please note, size is always in bytes, independently of the displacement498 ! unit762 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement 763 ! unit 499 764 500 765 CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) … … 512 777 pe_from = MOD( pe_from, this%sh_npes ) 513 778 514 END SUBROUTINE sm_allocate_shared_2d 779 END SUBROUTINE sm_allocate_shared_2d_64 780 781 782 !--------------------------------------------------------------------------------------------------! 783 ! Description: 784 ! ------------ 785 !> Allocate shared 2d-REAL (32 Bit) array on ALL threads 786 !--------------------------------------------------------------------------------------------------! 787 SUBROUTINE sm_allocate_shared_2d_32( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) 788 789 IMPLICIT NONE 790 791 CLASS(sm_class), INTENT(INOUT) :: this 792 793 INTEGER(iwp) :: disp_unit 794 INTEGER(iwp), INTENT(IN) :: n_nxlg 795 INTEGER(iwp), INTENT(IN) :: n_nxrg 796 INTEGER(iwp), INTENT(IN) :: n_nyng 797 INTEGER(iwp), INTENT(IN) :: n_nysg 798 INTEGER(iwp), SAVE :: pe_from = 0 799 INTEGER(iwp), INTENT(OUT) :: win 800 801 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 802 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 803 804 INTEGER(iwp), DIMENSION(2) :: buf_shape 805 806 REAL(sp), DIMENSION(:,:), POINTER :: buf 807 REAL(sp), DIMENSION(:,:), POINTER :: p2 808 809 TYPE(C_PTR), SAVE :: base_ptr 810 TYPE(C_PTR), SAVE :: rem_ptr 811 812 813 IF ( this%no_shared_memory_in_this_run ) RETURN 814 ! 815 !-- Allocate shared memory on node rank 0 threads. 816 IF ( this%sh_rank == pe_from ) THEN 817 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) 818 ELSE 819 wsize = 1 820 ENDIF 821 822 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 823 ! unit 824 825 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 826 ! 827 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 828 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 829 ! 830 !-- Convert C- to Fortran-pointer 831 buf_shape(2) = n_nyng - n_nysg + 1 832 buf_shape(1) = n_nxrg - n_nxlg + 1 833 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 834 p2(n_nxlg:, n_nysg:) => buf 835 ! 836 !-- Allocate shared memory in round robin on all PEs of a node. 837 pe_from = MOD( pe_from, this%sh_npes ) 838 839 END SUBROUTINE sm_allocate_shared_2d_32 515 840 516 841 … … 532 857 INTEGER(iwp), INTENT(IN) :: n_nysg !< 533 858 INTEGER(iwp), SAVE :: pe_from = 0 !< 859 INTEGER(iwp), INTENT(OUT) :: win !< 860 534 861 INTEGER(kind=MPI_ADDRESS_KIND) :: rem_size !< 535 INTEGER(iwp), INTENT(OUT) :: win !<536 862 INTEGER(kind=MPI_ADDRESS_KIND) :: wsize !< 537 863 … … 541 867 INTEGER(iwp), DIMENSION(:,:), POINTER :: p2i !< 542 868 543 TYPE(C_PTR), SAVE:: base_ptr !<544 TYPE(C_PTR), SAVE:: rem_ptr !<869 TYPE(C_PTR), SAVE :: base_ptr !< 870 TYPE(C_PTR), SAVE :: rem_ptr !< 545 871 546 872 … … 577 903 ! Description: 578 904 ! ------------ 579 !> Allocate shared 3d-REAL array on ALL threads580 !--------------------------------------------------------------------------------------------------! 581 SUBROUTINE sm_allocate_shared_3d ( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win )905 !> Allocate shared 3d-REAL (64 Bit) array on ALL threads 906 !--------------------------------------------------------------------------------------------------! 907 SUBROUTINE sm_allocate_shared_3d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) 582 908 583 909 IMPLICIT NONE … … 593 919 INTEGER, INTENT(IN) :: d3s !< 594 920 INTEGER, SAVE :: pe_from = 0 !< 921 INTEGER, INTENT(OUT) :: win !< 922 595 923 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 596 INTEGER, INTENT(OUT) :: win !<597 924 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 598 925 599 926 INTEGER, DIMENSION(3) :: buf_shape !< 600 927 601 REAL( wp), DIMENSION(:,:,:), POINTER :: buf !<602 REAL( wp), DIMENSION(:,:,:), POINTER :: p3 !<928 REAL(dp), DIMENSION(:,:,:), POINTER :: buf !< 929 REAL(dp), DIMENSION(:,:,:), POINTER :: p3 !< 603 930 604 931 TYPE(C_PTR), SAVE :: base_ptr !< … … 615 942 ENDIF 616 943 617 wsize = wsize * 8! Please note, size is always in bytes, independently of the displacement944 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement 618 945 ! unit 619 946 620 CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr )947 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 621 948 ! 622 949 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) … … 633 960 pe_from = MOD( pe_from, this%sh_npes ) 634 961 635 END SUBROUTINE sm_allocate_shared_3d 962 END SUBROUTINE sm_allocate_shared_3d_64 963 964 965 !--------------------------------------------------------------------------------------------------! 966 ! Description: 967 ! ------------ 968 !> Allocate shared 3d-REAL (32 Bit) array on ALL threads 969 !--------------------------------------------------------------------------------------------------! 970 SUBROUTINE sm_allocate_shared_3d_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) 971 972 IMPLICIT NONE 973 974 CLASS(sm_class), INTENT(inout) :: this 975 976 INTEGER :: disp_unit 977 INTEGER, INTENT(IN) :: d1e 978 INTEGER, INTENT(IN) :: d1s 979 INTEGER, INTENT(IN) :: d2e 980 INTEGER, INTENT(IN) :: d2s 981 INTEGER, INTENT(IN) :: d3e 982 INTEGER, INTENT(IN) :: d3s 983 INTEGER, SAVE :: pe_from = 0 984 INTEGER, INTENT(OUT) :: win 985 986 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 987 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 988 989 INTEGER, DIMENSION(3) :: buf_shape 990 991 REAL(sp), DIMENSION(:,:,:), POINTER :: buf 992 REAL(sp), DIMENSION(:,:,:), POINTER :: p3 993 994 TYPE(C_PTR), SAVE :: base_ptr 995 TYPE(C_PTR), SAVE :: rem_ptr 996 997 998 IF ( this%no_shared_memory_in_this_run ) RETURN 999 ! 1000 !-- Allocate shared memory on node rank 0 threads. 1001 IF ( this%sh_rank == pe_from ) THEN 1002 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1003 ELSE 1004 wsize = 1 1005 ENDIF 1006 1007 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 1008 ! unit 1009 1010 CALL MPI_WIN_ALLOCATE_SHARED( wsize, sp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1011 ! 1012 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1013 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 1014 ! 1015 !-- Convert C- to Fortran-pointer 1016 buf_shape(3) = d3e - d3s + 1 1017 buf_shape(2) = d2e - d2s + 1 1018 buf_shape(1) = d1e - d1s + 1 1019 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 1020 p3(d1s:,d2s:,d3s:) => buf 1021 ! 1022 !-- Allocate shared memory in round robin on all PEs of a node. 1023 pe_from = MOD( pe_from, this%sh_npes ) 1024 1025 END SUBROUTINE sm_allocate_shared_3d_32 1026 636 1027 #endif 637 1028 … … 694 1085 INTEGER(iwp), INTENT(INOUT) :: win !< 695 1086 696 IF ( this%no_shared_memory_in_this_run .OR. win == -1234567890 ) RETURN 697 ! win is used just to avoid compile errors because of unused arguments 1087 IF ( this%no_shared_memory_in_this_run ) RETURN 698 1088 #if defined( __parallel ) 699 1089 CALL MPI_WIN_FREE( win, ierr ) 700 1090 #endif 1091 win = -1 701 1092 702 1093 END SUBROUTINE sm_free_shared … … 723 1114 END SUBROUTINE sm_node_barrier 724 1115 1116 1117 SUBROUTINE save_grid_into_this_class( this ) 1118 1119 IMPLICIT NONE 1120 1121 CLASS(domain_decomposition_grid_features), INTENT(inout) :: this !< 1122 1123 this%myid = myid !< 1124 this%nnx = nnx !< 1125 this%nny = nny !< 1126 this%nx = nx !< 1127 this%nxl = nxl !< 1128 this%nxr = nxr !< 1129 this%ny = ny !< 1130 this%nyn = nyn !< 1131 this%nys = nys !< 1132 this%numprocs = numprocs !< 1133 this%comm2d = comm2d !< 1134 1135 END SUBROUTINE save_grid_into_this_class 1136 1137 1138 SUBROUTINE activate_grid_from_this_class( this ) 1139 1140 IMPLICIT NONE 1141 1142 CLASS(domain_decomposition_grid_features), INTENT(inout) :: this !< 1143 1144 myid = this%myid !< 1145 nnx = this%nnx !< 1146 nny = this%nny !< 1147 nx = this%nx !< 1148 nxl = this%nxl !< 1149 nxr = this%nxr !< 1150 ny = this%ny !< 1151 nyn = this%nyn !< 1152 nys = this%nys !< 1153 numprocs = this%numprocs !< 1154 comm2d = this%comm2d !< 1155 1156 END SUBROUTINE activate_grid_from_this_class 1157 725 1158 END MODULE shared_memory_io_mod
Note: See TracChangeset
for help on using the changeset viewer.