Changeset 4893 for palm/trunk/SOURCE/shared_memory_io_mod.f90
- Timestamp:
- Mar 2, 2021 4:39:14 PM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/shared_memory_io_mod.f90
r4828 r4893 25 25 ! ----------------- 26 26 ! $Id$ 27 ! revised output of surface data via MPI-IO for better performance 28 ! 29 ! 4828 2021-01-05 11:21:41Z Giersch 27 30 ! additions for output of particle time series 28 31 ! … … 39 42 ! unused variable removed 40 43 ! 41 !42 44 ! Additions for cyclic fill mode 43 !44 45 ! 45 46 ! File re-formatted to follow the PALM coding standard … … 47 48 ! 48 49 ! Initial version (Klaus Ketelsen) 49 !50 !51 50 ! 52 51 ! Description: … … 100 99 comm1dy, & 101 100 comm2d, & 101 comm_palm, & 102 102 ierr, & 103 103 myid, & … … 120 120 #endif 121 121 122 USE transpose_indices, & 123 ONLY: nxl_y, nxl_z, nxr_y, nxr_z, nys_x, nys_z, nyn_x, nyn_z, nzb_x, nzb_y, nzt_x, nzt_y 124 122 125 IMPLICIT NONE 123 126 … … 149 152 END TYPE domain_decomposition_grid_features 150 153 154 TYPE, PUBLIC :: sm_remote_array 155 156 TYPE(C_PTR) :: rem_ptr !< 157 INTEGER(iwp) :: d1e !< 158 INTEGER(iwp) :: d1s !< 159 INTEGER(iwp) :: d2e !< 160 INTEGER(iwp) :: d2s !< 161 INTEGER(iwp) :: d3e !< 162 INTEGER(iwp) :: d3s !< 163 INTEGER(iwp) :: d4e !< 164 INTEGER(iwp) :: d4s !< 165 166 END TYPE sm_remote_array 167 151 168 ! 152 169 !-- Class definition for shared memory instances. … … 164 181 INTEGER(iwp), PUBLIC :: sh_rank !< 165 182 166 LOGICAL, PUBLIC :: iam_io_pe = .TRUE. !< This PE is an IO-PE167 183 ! 168 184 !-- Variables for the I/O virtual grid 169 INTEGER(iwp), PUBLIC :: comm_io !< Communicator for all IO processes185 INTEGER(iwp), PUBLIC :: comm_io !< communicator for all IO processes 170 186 INTEGER(iwp), PUBLIC :: io_npes !< 171 187 INTEGER(iwp), PUBLIC :: io_rank !< 172 188 ! 173 189 !-- Variables for the node local communicator 174 INTEGER(iwp) :: comm_node !< Communicator for all processes of current node190 INTEGER(iwp) :: comm_node !< communicator for all processes of current node 175 191 INTEGER(iwp) :: io_pe_global_rank !< 176 192 INTEGER(iwp) :: n_npes !< 177 193 INTEGER(iwp) :: n_rank !< 178 194 179 TYPE(domain_decomposition_grid_features), PUBLIC :: io_grid !< io grid features, depending on reading from prerun or restart run 180 195 LOGICAL, PUBLIC :: is_root_pe !< 196 LOGICAL, PUBLIC :: iam_io_pe = .TRUE. !< this PE is an IO-PE 197 198 TYPE(domain_decomposition_grid_features), PUBLIC :: io_grid !< io grid features, depending on reading from prerun or main run 181 199 182 200 CONTAINS … … 191 209 PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier 192 210 #if defined( __parallel ) 211 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_32 193 212 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_64 194 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_32195 213 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1di 214 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_32 196 215 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_64 197 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_32198 216 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di 217 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_32 199 218 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_64 200 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_32 219 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_4d_32 220 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_4d_64 201 221 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3di_32 202 222 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3di_64 223 PROCEDURE, PASS(this), PUBLIC :: sm_all_allocate_shared_3d_64 203 224 204 225 GENERIC, PUBLIC :: sm_allocate_shared => & 205 sm_allocate_shared_1d_64, sm_allocate_shared_1d_32, & 206 sm_allocate_shared_2d_64, sm_allocate_shared_2d_32, & 207 sm_allocate_shared_2di, sm_allocate_shared_3d_64, & 208 sm_allocate_shared_3d_32, sm_allocate_shared_1di, & 209 sm_allocate_shared_3di_32, sm_allocate_shared_3di_64 226 sm_allocate_shared_1d_64, sm_allocate_shared_1d_32, & 227 sm_allocate_shared_2d_64, sm_allocate_shared_2d_32, & 228 sm_allocate_shared_2di, sm_allocate_shared_3d_64, & 229 sm_allocate_shared_4d_64, sm_allocate_shared_4d_32, & 230 sm_allocate_shared_3d_32, sm_allocate_shared_1di, & 231 sm_allocate_shared_3di_32, sm_allocate_shared_3di_64 232 233 GENERIC, PUBLIC :: sm_all_allocate_shared => sm_all_allocate_shared_3d_64 210 234 #endif 211 235 END TYPE sm_class … … 226 250 227 251 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 228 INTEGER , INTENT(IN), OPTIONAL:: comm_input !< main model communicator (comm2d) can optional be set252 INTEGER(iwp), INTENT(IN), OPTIONAL :: comm_input !< main model communicator (comm2d) can optional be set 229 253 230 254 #if defined( __parallel ) 231 INTEGER :: color 232 INTEGER :: max_n _npes!< maximum number of PEs/node255 INTEGER :: color !< 256 INTEGER :: max_npes_per_node !< maximum number of PEs/node 233 257 #endif 234 258 … … 237 261 this%nr_io_pe_per_node = 2 238 262 263 #if defined( __parallel ) 239 264 IF ( PRESENT( comm_input ) ) THEN 240 265 this%comm_model = comm_input … … 248 273 IF ( this%no_shared_memory_in_this_run ) THEN 249 274 this%iam_io_pe = .TRUE. 275 this%sh_rank = 0 276 this%sh_npes = 1 250 277 RETURN 251 278 ENDIF 252 279 253 #if defined( __parallel ) 254 ! 255 !-- Determine, how many MPI threads are running on a node 280 ! 281 !-- Determine, how many PEs are running on a node. 256 282 this%iam_io_pe = .FALSE. 257 283 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & … … 260 286 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 261 287 262 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr ) 288 CALL MPI_ALLREDUCE( this%n_npes, max_npes_per_node, 1, MPI_INTEGER, MPI_MAX, this%comm_model, & 289 ierr ) 263 290 ! 264 291 !-- Decide, if the configuration can run with shared-memory IO 265 IF ( max_n _npes> 64 ) THEN292 IF ( max_npes_per_node > 64 ) THEN 266 293 ! 267 294 !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node 268 295 this%nr_io_pe_per_node = 4 269 296 270 ELSEIF ( max_n _npes <= 32) THEN271 ! 272 !-- No shared memory IO with less than 3 2 threads/node297 ELSEIF ( max_npes_per_node <= 3 ) THEN 298 ! 299 !-- No shared memory IO with less than 3 MPI tasks/node 273 300 this%no_shared_memory_in_this_run = .TRUE. 274 301 this%iam_io_pe = .TRUE. … … 277 304 278 305 ! 279 !-- No shared memory IO with small setups 280 IF ( nx < 24 .OR. ny < 24) THEN306 !-- No shared memory IO with small setups. 307 IF ( nx < 16 .OR. ny < 16 ) THEN 281 308 this%no_shared_memory_in_this_run = .TRUE. 282 309 this%iam_io_pe = .TRUE. … … 299 326 ! 300 327 !-- Setup the communicator across the nodes depending on the shared memory rank. 301 !-- All threads with shared memory rank 0 will be I/O threads.328 !-- All PEs with shared memory rank 0 will be I/O PEs. 302 329 color = this%sh_rank 303 330 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) … … 316 343 ENDIF 317 344 CALL MPI_BCAST( this%io_pe_global_rank, 1, MPI_INTEGER, 0, this%comm_shared, ierr ) 318 319 345 #else 320 this%iam_io_pe = .TRUE. 346 this%iam_io_pe = .TRUE. 347 this%comm_model = comm2d 348 this%sh_rank = 0 349 this%sh_npes = 1 350 this%no_shared_memory_in_this_run = .TRUE. 321 351 #endif 322 323 ! write(9,'(a,8i7)') ' end of sm_init_comm ',this%sh_rank,this%sh_npes,this%io_rank,this%io_npes,this%io_pe_global_rank324 ! write(9,*) 'This process is IO Process ',this%iam_io_pe325 352 326 353 #if defined( __parallel ) … … 362 389 CALL MPI_ALLREDUCE( local_dim_s, local_dim_r, SIZE( local_dim_s ), MPI_INTEGER, MPI_SUM, & 363 390 this%comm_node, ierr ) 364 sh_group_size = ( max_n _npes+ this%nr_io_pe_per_node - 1 ) / this%nr_io_pe_per_node391 sh_group_size = ( max_npes_per_node + this%nr_io_pe_per_node - 1 ) / this%nr_io_pe_per_node 365 392 366 393 pe = 0 … … 417 444 END SUBROUTINE sm_init_comm 418 445 419 420 446 ! 421 447 !-- Initializing setup for output of particle time series. … … 428 454 429 455 #if defined( __parallel ) 430 INTEGER(iwp) :: color !<431 INTEGER(iwp) :: ierr !<432 INTEGER(iwp) :: max_n _npes!< maximum number of PEs/node456 INTEGER(iwp) :: color !< 457 INTEGER(iwp) :: ierr !< 458 INTEGER(iwp) :: max_npes_per_node !< maximum number of PEs/node 433 459 #endif 434 460 … … 451 477 #if defined( __parallel ) 452 478 ! 453 !-- Determine, how many MPI threads are running on a node479 !-- Determine, how many PEs are running on a node. 454 480 this%iam_io_pe = .FALSE. 455 481 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & … … 458 484 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 459 485 460 CALL MPI_ALLREDUCE( this%n_npes, max_n _npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr )461 486 CALL MPI_ALLREDUCE( this%n_npes, max_npes_per_node, 1, MPI_INTEGER, MPI_MAX, this%comm_model, & 487 ierr ) 462 488 ! 463 489 !-- TODO: better explanation … … 465 491 !-- even better to use the complete node for MPI shared memory (this%nr_io_pe_per_node = 1). 466 492 !- In the latter case, the access to the MPI shared memory buffer is slower, the number of 467 !-- particles to move between threads will be much smaller.468 IF ( max_n _npes> 64 ) THEN493 !-- particles to move between PEs will be much smaller. 494 IF ( max_npes_per_node > 64 ) THEN 469 495 ! 470 496 !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node … … 526 552 ! 527 553 !-- Setup the communicator across the nodes depending on the shared memory rank. 528 !-- All threads with shared memory rank 0 will be I/O threads.554 !-- All PEs with shared memory rank 0 will be I/O PEs. 529 555 color = this%sh_rank 530 556 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) … … 573 599 ! Description: 574 600 ! ------------ 575 !> Allocate shared 1d-REAL (64 Bit) array on ALL threads601 !> Allocate shared 1d-REAL (64 bit) array on PE 0 and pass address to all PEs. 576 602 !--------------------------------------------------------------------------------------------------! 577 603 SUBROUTINE sm_allocate_shared_1d_64( this, p1, d1, d2, win ) … … 590 616 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 591 617 592 INTEGER , DIMENSION(1):: buf_shape618 INTEGER(iwp), DIMENSION(1) :: buf_shape 593 619 594 620 REAL(dp), DIMENSION(:), POINTER :: buf … … 601 627 IF ( this%no_shared_memory_in_this_run ) RETURN 602 628 ! 603 !-- Allocate shared memory on node rank 0 threads.629 !-- Allocate shared memory on node rank 0 PEs. 604 630 IF ( this%sh_rank == pe_from ) THEN 605 631 wsize = d2 - d1 + 1 … … 629 655 ! Description: 630 656 ! ------------ 631 !> Allocate shared 1d-REAL (32 Bit) array on ALL threads657 !> Allocate shared 1d-REAL (32 bit) array on PE 0 and pass address to all PEs 632 658 !--------------------------------------------------------------------------------------------------! 633 659 SUBROUTINE sm_allocate_shared_1d_32( this, p1, d1, d2, win ) … … 646 672 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 647 673 648 INTEGER , DIMENSION(1):: buf_shape674 INTEGER(iwp), DIMENSION(1) :: buf_shape 649 675 650 676 REAL(sp), DIMENSION(:), POINTER :: buf … … 657 683 IF ( this%no_shared_memory_in_this_run ) RETURN 658 684 ! 659 !-- Allocate shared memory on node rank 0 threads.685 !-- Allocate shared memory on node rank 0 PEs. 660 686 IF ( this%sh_rank == pe_from ) THEN 661 687 wsize = d2 - d1 + 1 … … 685 711 ! Description: 686 712 ! ------------ 687 !> Allocate shared 1d-INTEGER array on ALL threads713 !> Allocate shared 1d-INTEGER array on PE 0 and pass address to all PEs. 688 714 !--------------------------------------------------------------------------------------------------! 689 715 SUBROUTINE sm_allocate_shared_1di( this, p1, d1, d2, win ) … … 702 728 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 703 729 704 INTEGER , DIMENSION(1):: buf_shape730 INTEGER(iwp), DIMENSION(1) :: buf_shape 705 731 706 732 INTEGER(iwp), DIMENSION(:), POINTER :: buf … … 713 739 IF ( this%no_shared_memory_in_this_run ) RETURN 714 740 ! 715 !-- Allocate shared memory on node rank 0 threads.741 !-- Allocate shared memory on node rank 0 PEs. 716 742 IF ( this%sh_rank == pe_from ) THEN 717 743 wsize = d2 - d1 + 1 … … 741 767 ! Description: 742 768 ! ------------ 743 !> Allocate shared 2d-REAL array on ALL threads (64 Bit)769 !> Allocate shared 2d-REAL array (64 bit) on PE 0 and pass address to all PEs. 744 770 !--------------------------------------------------------------------------------------------------! 745 771 SUBROUTINE sm_allocate_shared_2d_64( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) … … 771 797 IF ( this%no_shared_memory_in_this_run ) RETURN 772 798 ! 773 !-- Allocate shared memory on node rank 0 threads.799 !-- Allocate shared memory on node rank 0 PEs. 774 800 IF ( this%sh_rank == pe_from ) THEN 775 801 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) … … 801 827 ! Description: 802 828 ! ------------ 803 !> Allocate shared 2d-REAL (32 Bit) array on ALL threads829 !> Allocate shared 2d-REAL (32 Bit) array on PE 0 and pass address to all PEs. 804 830 !--------------------------------------------------------------------------------------------------! 805 831 SUBROUTINE sm_allocate_shared_2d_32( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) … … 831 857 IF ( this%no_shared_memory_in_this_run ) RETURN 832 858 ! 833 !-- Allocate shared memory on node rank 0 threads.859 !-- Allocate shared memory on node rank 0 PEs. 834 860 IF ( this%sh_rank == pe_from ) THEN 835 861 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) … … 861 887 ! Description: 862 888 ! ------------ 863 !> Allocate shared 2d-INTEGER array on ALL threads889 !> Allocate shared 2d-INTEGER array on PE 0 and pass address to all PEs. 864 890 !--------------------------------------------------------------------------------------------------! 865 891 SUBROUTINE sm_allocate_shared_2di( this, p2i, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) … … 891 917 IF ( this%no_shared_memory_in_this_run ) RETURN 892 918 ! 893 !-- Allocate shared memory on node rank 0 threads.919 !-- Allocate shared memory on node rank 0 PEs. 894 920 IF ( this%sh_rank == pe_from ) THEN 895 921 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) … … 921 947 ! Description: 922 948 ! ------------ 923 !> Allocate shared 3d-REAL (64 Bit) array on ALL threads949 !> Allocate shared 3d-REAL (64 bit) array on PE 0 and pass address to all PEs. 924 950 !--------------------------------------------------------------------------------------------------! 925 951 SUBROUTINE sm_allocate_shared_3d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 929 955 CLASS(sm_class), INTENT(inout) :: this !< 930 956 931 INTEGER 932 INTEGER , INTENT(IN):: d1e !<933 INTEGER , INTENT(IN):: d1s !<934 INTEGER , INTENT(IN):: d2e !<935 INTEGER , INTENT(IN):: d2s !<936 INTEGER , INTENT(IN):: d3e !<937 INTEGER , INTENT(IN):: d3s !<938 INTEGER , SAVE:: pe_from = 0 !<939 INTEGER , INTENT(OUT):: win !<957 INTEGER(iwp) :: disp_unit !< 958 INTEGER(iwp), INTENT(IN) :: d1e !< 959 INTEGER(iwp), INTENT(IN) :: d1s !< 960 INTEGER(iwp), INTENT(IN) :: d2e !< 961 INTEGER(iwp), INTENT(IN) :: d2s !< 962 INTEGER(iwp), INTENT(IN) :: d3e !< 963 INTEGER(iwp), INTENT(IN) :: d3s !< 964 INTEGER(iwp), SAVE :: pe_from = 0 !< 965 INTEGER(iwp), INTENT(OUT) :: win !< 940 966 941 967 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 942 968 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 943 969 944 INTEGER , DIMENSION(3):: buf_shape !<970 INTEGER(iwp), DIMENSION(3) :: buf_shape !< 945 971 946 972 REAL(dp), DIMENSION(:,:,:), POINTER :: buf !< … … 953 979 IF ( this%no_shared_memory_in_this_run ) RETURN 954 980 ! 955 !-- Allocate shared memory on node rank 0 threads.981 !-- Allocate shared memory on node rank 0 PEs. 956 982 IF ( this%sh_rank == pe_from ) THEN 957 983 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 984 1010 ! Description: 985 1011 ! ------------ 986 !> Allocate shared 3d-REAL (32 Bit) array on ALL threads1012 !> Allocate shared 3d-REAL (32 bit) array on PE 0 and pass address to all PEs. 987 1013 !--------------------------------------------------------------------------------------------------! 988 1014 SUBROUTINE sm_allocate_shared_3d_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 992 1018 CLASS(sm_class), INTENT(inout) :: this 993 1019 994 INTEGER 995 INTEGER , INTENT(IN):: d1e996 INTEGER , INTENT(IN):: d1s997 INTEGER , INTENT(IN):: d2e998 INTEGER , INTENT(IN):: d2s999 INTEGER , INTENT(IN):: d3e1000 INTEGER , INTENT(IN):: d3s1001 INTEGER , SAVE:: pe_from = 01002 INTEGER , INTENT(OUT):: win1020 INTEGER(iwp) :: disp_unit 1021 INTEGER(iwp), INTENT(IN) :: d1e 1022 INTEGER(iwp), INTENT(IN) :: d1s 1023 INTEGER(iwp), INTENT(IN) :: d2e 1024 INTEGER(iwp), INTENT(IN) :: d2s 1025 INTEGER(iwp), INTENT(IN) :: d3e 1026 INTEGER(iwp), INTENT(IN) :: d3s 1027 INTEGER(iwp), SAVE :: pe_from = 0 1028 INTEGER(iwp), INTENT(OUT) :: win 1003 1029 1004 1030 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 1005 1031 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 1006 1032 1007 INTEGER , DIMENSION(3):: buf_shape1033 INTEGER(iwp), DIMENSION(3) :: buf_shape 1008 1034 1009 1035 REAL(sp), DIMENSION(:,:,:), POINTER :: buf … … 1016 1042 IF ( this%no_shared_memory_in_this_run ) RETURN 1017 1043 ! 1018 !-- Allocate shared memory on node rank 0 threads.1044 !-- Allocate shared memory on node rank 0 PEs. 1019 1045 IF ( this%sh_rank == pe_from ) THEN 1020 1046 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 1047 1073 ! Description: 1048 1074 ! ------------ 1049 !> Allocate shared 3d-REAL (32 bit) array on ALL threads 1075 !> Allocate shared 4d-REAL (64 bit) array on PE 0 and pass address to all PEs. 1076 !--------------------------------------------------------------------------------------------------! 1077 SUBROUTINE sm_allocate_shared_4d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, d4s, d4e, win ) 1078 1079 IMPLICIT NONE 1080 1081 CLASS(sm_class), INTENT(inout) :: this !< 1082 1083 INTEGER :: disp_unit !< 1084 INTEGER(iwp), INTENT(IN) :: d1e !< 1085 INTEGER(iwp), INTENT(IN) :: d1s !< 1086 INTEGER(iwp), INTENT(IN) :: d2e !< 1087 INTEGER(iwp), INTENT(IN) :: d2s !< 1088 INTEGER(iwp), INTENT(IN) :: d3e !< 1089 INTEGER(iwp), INTENT(IN) :: d3s !< 1090 INTEGER(iwp), INTENT(IN) :: d4e !< 1091 INTEGER(iwp), INTENT(IN) :: d4s !< 1092 INTEGER(iwp), SAVE :: pe_from = 0 !< 1093 INTEGER(iwp), INTENT(OUT) :: win !< 1094 1095 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1096 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1097 1098 INTEGER(iwp), DIMENSION(4) :: buf_shape !< 1099 1100 REAL(dp), DIMENSION(:,:,:,:), POINTER :: buf !< 1101 REAL(dp), DIMENSION(:,:,:,:), POINTER :: p3 !< 1102 1103 TYPE(C_PTR), SAVE :: base_ptr !< 1104 TYPE(C_PTR), SAVE :: rem_ptr !< 1105 1106 1107 IF ( this%no_shared_memory_in_this_run ) RETURN 1108 ! 1109 !-- Allocate shared memory on node rank 0 PEs. 1110 IF ( this%sh_rank == pe_from ) THEN 1111 wsize = (d4e - d4s +1) * ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1112 ELSE 1113 wsize = 1 1114 ENDIF 1115 1116 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement 1117 ! unit 1118 1119 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1120 ! 1121 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1122 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 1123 ! 1124 !-- Convert C- to Fortran-pointer 1125 buf_shape(4) = d4e - d4s + 1 1126 buf_shape(3) = d3e - d3s + 1 1127 buf_shape(2) = d2e - d2s + 1 1128 buf_shape(1) = d1e - d1s + 1 1129 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 1130 p3(d1s:,d2s:,d3s:,d4s:) => buf 1131 ! 1132 !-- Allocate shared memory in round robin on all PEs of a node. 1133 pe_from = MOD( pe_from, this%sh_npes ) 1134 1135 END SUBROUTINE sm_allocate_shared_4d_64 1136 1137 1138 !--------------------------------------------------------------------------------------------------! 1139 ! Description: 1140 ! ------------ 1141 !> Allocate shared 4d-REAL (32 bit) array on PE 0 and pass address to all PEs. 1142 !--------------------------------------------------------------------------------------------------! 1143 SUBROUTINE sm_allocate_shared_4d_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, d4s, d4e, win ) 1144 1145 IMPLICIT NONE 1146 1147 CLASS(sm_class), INTENT(inout) :: this !< 1148 1149 INTEGER :: disp_unit !< 1150 INTEGER(iwp), INTENT(IN) :: d1e !< 1151 INTEGER(iwp), INTENT(IN) :: d1s !< 1152 INTEGER(iwp), INTENT(IN) :: d2e !< 1153 INTEGER(iwp), INTENT(IN) :: d2s !< 1154 INTEGER(iwp), INTENT(IN) :: d3e !< 1155 INTEGER(iwp), INTENT(IN) :: d3s !< 1156 INTEGER(iwp), INTENT(IN) :: d4e !< 1157 INTEGER(iwp), INTENT(IN) :: d4s !< 1158 INTEGER(iwp), SAVE :: pe_from = 0 !< 1159 INTEGER(iwp), INTENT(OUT) :: win !< 1160 1161 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1162 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1163 1164 INTEGER(iwp), DIMENSION(4) :: buf_shape !< 1165 1166 REAL(sp), DIMENSION(:,:,:,:), POINTER :: buf !< 1167 REAL(sp), DIMENSION(:,:,:,:), POINTER :: p3 !< 1168 1169 TYPE(C_PTR), SAVE :: base_ptr !< 1170 TYPE(C_PTR), SAVE :: rem_ptr !< 1171 1172 1173 IF ( this%no_shared_memory_in_this_run ) RETURN 1174 ! 1175 !-- Allocate shared memory on node rank 0 PEs. 1176 IF ( this%sh_rank == pe_from ) THEN 1177 wsize = (d4e - d4s +1) * ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1178 ELSE 1179 wsize = 1 1180 ENDIF 1181 1182 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 1183 ! unit 1184 1185 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1186 ! 1187 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1188 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 1189 ! 1190 !-- Convert C- to Fortran-pointer 1191 buf_shape(4) = d4e - d4s + 1 1192 buf_shape(3) = d3e - d3s + 1 1193 buf_shape(2) = d2e - d2s + 1 1194 buf_shape(1) = d1e - d1s + 1 1195 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 1196 p3(d1s:,d2s:,d3s:,d4s:) => buf 1197 ! 1198 !-- Allocate shared memory in round robin on all PEs of a node. 1199 pe_from = MOD( pe_from, this%sh_npes ) 1200 1201 END SUBROUTINE sm_allocate_shared_4d_32 1202 1203 1204 !--------------------------------------------------------------------------------------------------! 1205 ! Description: 1206 ! ------------ 1207 !> Allocate shared 3d-INTEGER (32 bit) array on PE 0 and pass address to all PEs. 1050 1208 !--------------------------------------------------------------------------------------------------! 1051 1209 SUBROUTINE sm_allocate_shared_3di_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 1053 1211 IMPLICIT NONE 1054 1212 1055 CLASS(sm_class), INTENT(inout) :: this1056 1057 INTEGER :: disp_unit1058 INTEGER , INTENT(IN):: d1e1059 INTEGER , INTENT(IN):: d1s1060 INTEGER , INTENT(IN):: d2e1061 INTEGER , INTENT(IN):: d2s1062 INTEGER , INTENT(IN):: d3e1063 INTEGER , INTENT(IN):: d3s1064 INTEGER , SAVE:: pe_from = 01065 INTEGER , INTENT(OUT):: win1066 1067 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size1068 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize1069 1070 INTEGER , DIMENSION(3):: buf_shape1213 CLASS(sm_class), INTENT(inout) :: this 1214 1215 INTEGER :: disp_unit 1216 INTEGER(iwp), INTENT(IN) :: d1e 1217 INTEGER(iwp), INTENT(IN) :: d1s 1218 INTEGER(iwp), INTENT(IN) :: d2e 1219 INTEGER(iwp), INTENT(IN) :: d2s 1220 INTEGER(iwp), INTENT(IN) :: d3e 1221 INTEGER(iwp), INTENT(IN) :: d3s 1222 INTEGER(iwp), SAVE :: pe_from = 0 1223 INTEGER(iwp), INTENT(OUT) :: win 1224 1225 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 1226 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 1227 1228 INTEGER(iwp), DIMENSION(3) :: buf_shape 1071 1229 1072 1230 INTEGER(isp), DIMENSION(:,:,:), POINTER :: buf 1073 1231 INTEGER(isp), DIMENSION(:,:,:), POINTER :: p3 1074 1232 1075 TYPE(C_PTR), SAVE :: base_ptr1076 TYPE(C_PTR), SAVE :: rem_ptr1077 1078 1079 IF ( this%no_shared_memory_in_this_run ) RETURN 1080 ! 1081 !-- Allocate shared memory on node rank 0 threads.1233 TYPE(C_PTR), SAVE :: base_ptr 1234 TYPE(C_PTR), SAVE :: rem_ptr 1235 1236 1237 IF ( this%no_shared_memory_in_this_run ) RETURN 1238 ! 1239 !-- Allocate shared memory on node rank 0 PEs. 1082 1240 IF ( this%sh_rank == pe_from ) THEN 1083 1241 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 1110 1268 ! Description: 1111 1269 ! ------------ 1112 !> Allocate shared 3d- REAL (64 bit) array on ALL threads1270 !> Allocate shared 3d-INTEGER (64 bit) array on PE 0 and pass address to all PEs. 1113 1271 !--------------------------------------------------------------------------------------------------! 1114 1272 SUBROUTINE sm_allocate_shared_3di_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) … … 1116 1274 IMPLICIT NONE 1117 1275 1118 CLASS(sm_class), INTENT(inout) :: this !<1119 1120 INTEGER :: disp_unit !<1121 INTEGER , INTENT(IN):: d1e !<1122 INTEGER , INTENT(IN):: d1s !<1123 INTEGER , INTENT(IN):: d2e !<1124 INTEGER , INTENT(IN):: d2s !<1125 INTEGER , INTENT(IN):: d3e !<1126 INTEGER , INTENT(IN):: d3s !<1127 INTEGER , SAVE:: pe_from = 0 !<1128 INTEGER , INTENT(OUT):: win !<1129 1130 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !<1131 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !<1132 1133 INTEGER , DIMENSION(3):: buf_shape !<1276 CLASS(sm_class), INTENT(inout) :: this !< 1277 1278 INTEGER :: disp_unit !< 1279 INTEGER(iwp), INTENT(IN) :: d1e !< 1280 INTEGER(iwp), INTENT(IN) :: d1s !< 1281 INTEGER(iwp), INTENT(IN) :: d2e !< 1282 INTEGER(iwp), INTENT(IN) :: d2s !< 1283 INTEGER(iwp), INTENT(IN) :: d3e !< 1284 INTEGER(iwp), INTENT(IN) :: d3s !< 1285 INTEGER(iwp), SAVE :: pe_from = 0 !< 1286 INTEGER(iwp), INTENT(OUT) :: win !< 1287 1288 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1289 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1290 1291 INTEGER(iwp), DIMENSION(3) :: buf_shape !< 1134 1292 1135 1293 INTEGER(idp), DIMENSION(:,:,:), POINTER :: buf !< 1136 1294 INTEGER(idp), DIMENSION(:,:,:), POINTER :: p3 !< 1137 1295 1138 TYPE(C_PTR), SAVE :: base_ptr !<1139 TYPE(C_PTR), SAVE :: rem_ptr !<1140 1141 1142 IF ( this%no_shared_memory_in_this_run ) RETURN 1143 ! 1144 !-- Allocate shared memory on node rank 0 threads.1296 TYPE(C_PTR), SAVE :: base_ptr !< 1297 TYPE(C_PTR), SAVE :: rem_ptr !< 1298 1299 1300 IF ( this%no_shared_memory_in_this_run ) RETURN 1301 ! 1302 !-- Allocate shared memory on node rank 0 PEs. 1145 1303 IF ( this%sh_rank == pe_from ) THEN 1146 1304 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) … … 1169 1327 END SUBROUTINE sm_allocate_shared_3di_64 1170 1328 1329 1330 !--------------------------------------------------------------------------------------------------! 1331 ! Description: 1332 ! ------------ 1333 !> Allocate shared 3d-REAL (64 Bit) array on ALL PEs. 1334 !> 1335 !> Every PE allocates the local part of a node-shared array. 1336 !> The C-Pointer of this array and the local limits are broadcasted to all PEs of the node 1337 !> The information is store in an array of type sm_remote_array and can be retrieved 1338 !> by sm_remote_array to access remote data. 1339 !--------------------------------------------------------------------------------------------------! 1340 SUBROUTINE sm_all_allocate_shared_3d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, remote_arrays, win ) 1341 1342 IMPLICIT NONE 1343 1344 CLASS(sm_class), INTENT(inout) :: this !< class pointer 1345 REAL(dp), DIMENSION(:,:,:), POINTER :: p3 !< return local array pointer 1346 1347 INTEGER(iwp), INTENT(IN) :: d1e !< end index dimension 1 1348 INTEGER(iwp), INTENT(IN) :: d1s !< start index dimension 1 1349 INTEGER(iwp), INTENT(IN) :: d2e !< end index dimension 2 1350 INTEGER(iwp), INTENT(IN) :: d2s !< start index dimension 2 1351 INTEGER(iwp), INTENT(IN) :: d3e !< end index dimension 3 1352 INTEGER(iwp), INTENT(IN) :: d3s !< start index dimension 3 1353 INTEGER(iwp), INTENT(OUT) :: win !< MPI Window 1354 1355 INTEGER(iwp), DIMENSION(3) :: buf_shape !< 1356 INTEGER(iwp) :: disp_unit !< 1357 INTEGER(iwp) :: i !< 1358 INTEGER(iwp), SAVE :: pe_from = 0 !< 1359 1360 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 1361 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 1362 1363 REAL(dp), DIMENSION(:,:,:), POINTER :: buf !< 1364 1365 TYPE(sm_remote_array),INTENT(INOUT), DIMENSION(0:this%sh_npes-1) :: remote_arrays !< info about all remote arrays 1366 1367 TYPE(C_PTR), SAVE :: base_ptr !< 1368 1369 INTEGER(iwp),DIMENSION(6,0:this%sh_npes-1) :: all_indices_s 1370 INTEGER(iwp),DIMENSION(6,0:this%sh_npes-1) :: all_indices 1371 1372 1373 IF ( this%no_shared_memory_in_this_run ) RETURN 1374 1375 all_indices_s = 0 1376 1377 1378 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1379 1380 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement unit 1381 1382 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1383 ! 1384 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1385 1386 all_indices_s(1,this%sh_rank) = d1s 1387 all_indices_s(2,this%sh_rank) = d1e 1388 all_indices_s(3,this%sh_rank) = d2s 1389 all_indices_s(4,this%sh_rank) = d2e 1390 all_indices_s(5,this%sh_rank) = d3s 1391 all_indices_s(6,this%sh_rank) = d3e 1392 1393 CALL MPI_ALLREDUCE (all_indices_s ,all_indices, SIZE(all_indices_s), MPI_INTEGER, MPI_SUM, this%comm_shared, ierr) 1394 1395 DO i=0,this%sh_npes-1 1396 CALL MPI_WIN_SHARED_QUERY( win, i, rem_size, disp_unit, remote_arrays(i)%rem_ptr, ierr ) 1397 remote_arrays(i)%d1s = all_indices(1,i) 1398 remote_arrays(i)%d1e = all_indices(2,i) 1399 remote_arrays(i)%d2s = all_indices(3,i) 1400 remote_arrays(i)%d2e = all_indices(4,i) 1401 remote_arrays(i)%d3s = all_indices(5,i) 1402 remote_arrays(i)%d3e = all_indices(6,i) 1403 END DO 1404 1405 ! 1406 !-- Convert C- to Fortran-pointer 1407 buf_shape(3) = d3e - d3s + 1 1408 buf_shape(2) = d2e - d2s + 1 1409 buf_shape(1) = d1e - d1s + 1 1410 CALL C_F_POINTER( remote_arrays(this%sh_rank)%rem_ptr, buf, buf_shape ) 1411 p3(d1s:,d2s:,d3s:) => buf 1412 ! 1413 !-- Allocate shared memory in round robin on all PEs of a node. 1414 pe_from = MOD( pe_from, this%sh_npes ) 1415 1416 END SUBROUTINE sm_all_allocate_shared_3d_64 1171 1417 #endif 1172 1418 … … 1243 1489 !> ... 1244 1490 !--------------------------------------------------------------------------------------------------! 1245 SUBROUTINE sm_node_barrier( this ) 1246 1247 IMPLICIT NONE 1491 SUBROUTINE sm_node_barrier( this, win ) 1492 1493 IMPLICIT NONE 1494 1495 INTEGER(iwp), OPTIONAL :: win !< 1248 1496 1249 1497 CLASS(sm_class), INTENT(inout) :: this !< … … 1254 1502 #if defined( __parallel ) 1255 1503 CALL MPI_BARRIER( this%comm_shared, ierr ) 1504 IF ( PRESENT(win) ) THEN 1505 CALL MPI_WIN_FENCE(0, win, ierr ) 1506 ENDIF 1256 1507 #endif 1257 1508
Note: See TracChangeset
for help on using the changeset viewer.