Changeset 4617 for palm/trunk/SOURCE/shared_memory_io_mod.f90
- Timestamp:
- Jul 22, 2020 9:48:50 AM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/shared_memory_io_mod.f90
r4591 r4617 25 25 ! $Id$ 26 26 ! 27 ! Additions for cyclic fill mode 28 ! 29 ! 27 30 ! File re-formatted to follow the PALM coding standard 28 31 ! 29 32 ! 30 !31 33 ! Initial version (Klaus Ketelsen) 32 34 ! … … 76 78 77 79 USE kinds, & 78 ONLY: iwp, & 80 ONLY: dp, & 81 iwp, & 82 sp, & 79 83 wp 80 81 82 USE transpose_indices, &83 ONLY: nxl_z, &84 nxr_z, &85 nyn_x, &86 nyn_z, &87 nys_x, &88 nys_z89 90 91 84 92 85 USE pegrid, & … … 121 114 122 115 ! 123 !-- Type to store grid information 124 TYPE, PUBLIC :: local_boundaries !< 125 126 INTEGER(iwp) :: nnx !< 127 INTEGER(iwp) :: nny !< 128 INTEGER(iwp) :: nx !< 129 INTEGER(iwp) :: nxl !< 130 INTEGER(iwp) :: nxr !< 131 INTEGER(iwp) :: ny !< 132 INTEGER(iwp) :: nyn !< 133 INTEGER(iwp) :: nys !< 134 135 136 137 138 END TYPE local_boundaries 116 !-- Type to store information about the domain decomposition grid 117 TYPE, PUBLIC :: domain_decomposition_grid_features !< 118 119 INTEGER(iwp) :: comm2d !< 120 INTEGER(iwp) :: myid !< 121 INTEGER(iwp) :: nnx !< 122 INTEGER(iwp) :: nny !< 123 INTEGER(iwp) :: nx !< 124 INTEGER(iwp) :: nxl !< 125 INTEGER(iwp) :: nxr !< 126 INTEGER(iwp) :: ny !< 127 INTEGER(iwp) :: nyn !< 128 INTEGER(iwp) :: nys !< 129 INTEGER(iwp) :: numprocs !< 130 131 CONTAINS 132 133 PROCEDURE, PASS(this), PUBLIC :: activate_grid_from_this_class 134 PROCEDURE, PASS(this), PUBLIC :: save_grid_into_this_class 135 136 END TYPE domain_decomposition_grid_features 139 137 140 138 ! … … 145 143 INTEGER(iwp) :: nr_io_pe_per_node = 2 !< typical configuration, 2 sockets per node 146 144 LOGICAL :: no_shared_Memory_in_this_run !< 145 146 INTEGER(iwp) :: comm_model !< communicator of this model run 147 147 ! 148 148 !-- Variables for the shared memory communicator 149 INTEGER(iwp), PUBLIC :: comm_shared !< Communicator for processes with shared array149 INTEGER(iwp), PUBLIC :: comm_shared !< communicator for processes with shared array 150 150 INTEGER(iwp), PUBLIC :: sh_npes !< 151 151 INTEGER(iwp), PUBLIC :: sh_rank !< … … 157 157 INTEGER(iwp), PUBLIC :: io_npes !< 158 158 INTEGER(iwp), PUBLIC :: io_rank !< 159 160 TYPE( local_boundaries ), PUBLIC :: io_grid161 162 159 ! 163 160 !-- Variables for the node local communicator … … 167 164 INTEGER(iwp) :: n_rank !< 168 165 169 CONTAINS 170 171 PRIVATE 172 173 PROCEDURE, PASS(this), PUBLIC :: is_sm_active !< 174 PROCEDURE, PASS(this), PUBLIC :: sm_adjust_outer_boundary !< 175 PROCEDURE, PASS(this), PUBLIC :: sm_free_shared !< 176 PROCEDURE, PASS(this), PUBLIC :: sm_init_comm !< 177 PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier !< 166 TYPE(domain_decomposition_grid_features), PUBLIC :: io_grid !< io grid features, depending on reading from prerun or restart run 167 168 169 CONTAINS 170 171 PRIVATE 172 173 PROCEDURE, PASS(this), PUBLIC :: is_sm_active 174 PROCEDURE, PASS(this), PUBLIC :: sm_adjust_outer_boundary 175 PROCEDURE, PASS(this), PUBLIC :: sm_free_shared 176 PROCEDURE, PASS(this), PUBLIC :: sm_init_comm 177 PROCEDURE, PASS(this), PUBLIC :: sm_init_part 178 PROCEDURE, PASS(this), PUBLIC :: sm_node_barrier 178 179 #if defined( __parallel ) 179 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d !< 180 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d !< 181 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di !< 182 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d !< 183 184 GENERIC, PUBLIC :: sm_allocate_shared => sm_allocate_shared_1d, sm_allocate_shared_2d, & 185 sm_allocate_shared_2di, sm_allocate_shared_3d !< 180 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_64 181 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1d_32 182 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_1di 183 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_64 184 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2d_32 185 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_2di 186 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_64 187 PROCEDURE, PASS(this), PUBLIC :: sm_allocate_shared_3d_32 188 189 GENERIC, PUBLIC :: sm_allocate_shared => & 190 sm_allocate_shared_1d_64, sm_allocate_shared_1d_32, & 191 sm_allocate_shared_2d_64, sm_allocate_shared_2d_32, & 192 sm_allocate_shared_2di, sm_allocate_shared_3d_64, & 193 sm_allocate_shared_3d_32, sm_allocate_shared_1di 186 194 #endif 187 195 END TYPE sm_class … … 197 205 !> Setup the grid for shared memory IO. 198 206 !--------------------------------------------------------------------------------------------------! 199 SUBROUTINE sm_init_comm( this, sm_active ) 200 201 IMPLICIT NONE 202 203 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 207 SUBROUTINE sm_init_comm( this, sm_active, comm_input ) 208 209 IMPLICIT NONE 210 211 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 212 INTEGER, INTENT(IN), OPTIONAL :: comm_input !< main model communicator (comm2d) can optional be set 204 213 205 214 #if defined( __parallel ) 206 INTEGER :: color !<207 INTEGER :: max_n_npes !< Maximum number of PEs/node215 INTEGER :: color 216 INTEGER :: max_n_npes !< maximum number of PEs/node 208 217 #endif 209 218 210 LOGICAL, INTENT(IN) :: sm_active !< Flag to activate shared-memory IO 211 219 LOGICAL, INTENT(IN) :: sm_active !< flag to activate shared-memory IO 220 221 IF ( PRESENT( comm_input ) ) THEN 222 this%comm_model = comm_input 223 ELSE 224 this%comm_model = comm2d 225 ENDIF 212 226 213 227 this%no_shared_memory_in_this_run = .NOT. sm_active 228 this%comm_io = this%comm_model ! preset in case of non shared-memory-IO 214 229 215 230 IF ( this%no_shared_memory_in_this_run ) THEN … … 222 237 !-- Determine, how many MPI threads are running on a node 223 238 this%iam_io_pe = .FALSE. 224 CALL MPI_COMM_SPLIT_TYPE( comm2d, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, this%comm_node, ierr ) 239 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & 240 this%comm_node, ierr ) 225 241 CALL MPI_COMM_SIZE( this%comm_node, this%n_npes, ierr ) 226 242 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 227 243 228 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, comm2d, ierr )244 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr ) 229 245 ! 230 246 !-- Decide, if the configuration can run with shared-memory IO … … 267 283 !-- All threads with shared memory rank 0 will be I/O threads. 268 284 color = this%sh_rank 269 CALL MPI_COMM_SPLIT( comm2d, color, 0, this%comm_io, ierr )285 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) 270 286 271 287 IF ( this%comm_io /= MPI_COMM_NULL ) THEN … … 287 303 #endif 288 304 305 ! write(9,'(a,8i7)') ' end of sm_init_comm ',this%sh_rank,this%sh_npes,this%io_rank,this%io_npes,this%io_pe_global_rank 306 ! write(9,*) 'This process is IO Process ',this%iam_io_pe 307 289 308 #if defined( __parallel ) 290 309 CONTAINS … … 305 324 INTEGER(iwp), DIMENSION(4,0:this%n_npes-1) :: local_dim_r !< 306 325 307 TYPE( local_boundaries), DIMENSION(32) :: node_grid !<326 TYPE(domain_decomposition_grid_features), DIMENSION(32) :: node_grid !< 308 327 309 328 ! … … 381 400 382 401 402 ! 403 !-- TODO: short description required, about the meaning of the following routine 404 !-- part must be renamed particles! 405 SUBROUTINE sm_init_part( this ) 406 407 IMPLICIT NONE 408 409 CLASS(sm_class), INTENT(INOUT) :: this !< pointer to access internal variables of this call 410 411 #if defined( __parallel ) 412 INTEGER(iwp) :: color !< 413 INTEGER(iwp) :: comm_shared_base !< 414 INTEGER(iwp) :: ierr !< 415 INTEGER(iwp) :: max_n_npes !< maximum number of PEs/node 416 417 LOGICAL :: sm_active !< 418 #endif 419 420 421 sm_active = .TRUE. ! particle IO always uses shared memory 422 this%comm_model = comm2d 423 424 this%no_shared_memory_in_this_run = .NOT. sm_active 425 this%comm_io = this%comm_model ! preset in case of non shared-memory-IO 426 427 IF ( this%no_shared_memory_in_this_run ) THEN 428 this%iam_io_pe = .TRUE. 429 RETURN 430 ENDIF 431 432 #if defined( __parallel ) 433 ! 434 !-- Determine, how many MPI threads are running on a node 435 this%iam_io_pe = .FALSE. 436 CALL MPI_COMM_SPLIT_TYPE( this%comm_model, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, & 437 this%comm_node, ierr ) 438 CALL MPI_COMM_SIZE( this%comm_node, this%n_npes, ierr ) 439 CALL MPI_COMM_RANK( this%comm_node, this%n_rank, ierr ) 440 441 CALL MPI_ALLREDUCE( this%n_npes, max_n_npes, 1, MPI_INTEGER, MPI_MAX, this%comm_model, ierr ) 442 443 ! 444 !-- TODO: better explanation 445 !-- It has to be testet, if using memory blocks for an IO process (MPI shared Memory), or if it is 446 !-- even better to use the complete node for MPI shared memory (this%nr_io_pe_per_node = 1). 447 !- In the latter case, the access to the MPI shared memory buffer is slower, the number of 448 !-- particles to move between threads will be much smaller. 449 IF ( max_n_npes > 64 ) THEN 450 ! 451 !-- Special configuration on the HLRN-IV system with 4 shared memory blocks/node 452 this%nr_io_pe_per_node = 4 453 ENDIF 454 455 IF ( this%nr_io_pe_per_node == 1 ) THEN 456 ! 457 !-- This branch is not realized so far 458 this%iam_io_pe = ( this%n_rank == 0 ) 459 this%comm_shared = this%comm_node 460 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 461 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 462 463 ELSEIF( this%nr_io_pe_per_node == 2 ) THEN 464 465 this%iam_io_pe = ( this%n_rank == 0 .OR. this%n_rank == this%n_npes/2 ) 466 IF ( this%n_rank < this%n_npes/2 ) THEN 467 color = 1 468 ELSE 469 color = 2 470 ENDIF 471 CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) 472 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 473 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 474 475 ELSEIF( this%nr_io_pe_per_node == 4 ) THEN 476 477 this%iam_io_pe = ( this%n_rank == 0 .OR. this%n_rank == this%n_npes/4 .OR. & 478 this%n_rank == this%n_npes/2 .OR. this%n_rank == (3*this%n_npes)/4 ) 479 IF ( this%n_rank < this%n_npes/4 ) THEN 480 color = 1 481 ELSEIF( this%n_rank < this%n_npes/2 ) THEN 482 color = 2 483 ELSEIF( this%n_rank < (3*this%n_npes)/4 ) THEN 484 color = 3 485 ELSE 486 color = 4 487 ENDIF 488 CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) 489 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 490 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 491 492 ELSE 493 494 WRITE( *, * ) 'shared_memory_io_mod: internal error' 495 WRITE( *, * ) 'only 2 or 4 shared memory groups per node are allowed' 496 STOP 497 498 ENDIF 499 500 ! 501 !-- Setup the shared memory area 502 CALL MPI_COMM_SPLIT( this%comm_node, color, 0, this%comm_shared, ierr ) 503 CALL MPI_COMM_SIZE( this%comm_shared, this%sh_npes, ierr ) 504 CALL MPI_COMM_RANK( this%comm_shared, this%sh_rank, ierr ) 505 506 ! 507 !-- Setup the communicator across the nodes depending on the shared memory rank. 508 !-- All threads with shared memory rank 0 will be I/O threads. 509 color = this%sh_rank 510 CALL MPI_COMM_SPLIT( this%comm_model, color, 0, this%comm_io, ierr ) 511 512 IF ( this%comm_io /= MPI_COMM_NULL ) THEN 513 CALL MPI_COMM_SIZE( this%comm_io, this%io_npes, ierr ) 514 CALL MPI_COMM_RANK( this%comm_io, this%io_rank, ierr ) 515 ELSE 516 this%io_npes = -1 517 this%io_rank = -1 518 ENDIF 519 520 IF ( this%sh_rank == 0 ) THEN 521 this%iam_io_pe = .TRUE. 522 this%io_pe_global_rank = myid 523 ENDIF 524 CALL MPI_BCAST( this%io_pe_global_rank, 1, MPI_INTEGER, 0, this%comm_shared, ierr ) 525 526 #else 527 this%iam_io_pe = .FALSE. 528 #endif 529 530 ! write(9,'(a,8i7)') 'sm_init_comm_part ',this%sh_rank,this%sh_npes,this%io_rank,this%io_npes 531 532 END SUBROUTINE sm_init_part 383 533 384 534 !--------------------------------------------------------------------------------------------------! … … 401 551 402 552 #if defined( __parallel ) 403 !--------------------------------------------------------------------------------------------------! 404 ! Description: 405 ! ------------ 406 !> Allocate shared 1d-REAL array on ALL threads 407 !--------------------------------------------------------------------------------------------------! 408 SUBROUTINE sm_allocate_shared_1d( this, p1, d1, d2, win ) 409 410 IMPLICIT NONE 411 412 CLASS(sm_class), INTENT(inout) :: this !< 413 !< 414 INTEGER(iwp) :: disp_unit !< 415 INTEGER(iwp), INTENT(IN) :: d1 !< 416 INTEGER(iwp), INTENT(IN) :: d2 !< 417 INTEGER(iwp), SAVE :: pe_from = 0 !< 418 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 419 INTEGER(iwp), INTENT(OUT) :: win !< 420 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 421 422 INTEGER, DIMENSION(1) :: buf_shape !< 423 424 REAL(wp), DIMENSION(:), POINTER :: buf !< 425 REAL(wp), DIMENSION(:), POINTER :: p1 !< 426 427 TYPE(C_PTR), SAVE :: base_ptr !< 428 TYPE(C_PTR), SAVE :: rem_ptr !< 553 554 !--------------------------------------------------------------------------------------------------! 555 ! Description: 556 ! ------------ 557 !> Allocate shared 1d-REAL (64 Bit) array on ALL threads 558 !--------------------------------------------------------------------------------------------------! 559 SUBROUTINE sm_allocate_shared_1d_64( this, p1, d1, d2, win ) 560 561 IMPLICIT NONE 562 563 CLASS(sm_class), INTENT(inout) :: this 564 565 INTEGER(iwp) :: disp_unit 566 INTEGER(iwp), INTENT(IN) :: d1 567 INTEGER(iwp), INTENT(IN) :: d2 568 INTEGER(iwp), SAVE :: pe_from = 0 569 INTEGER(iwp), INTENT(OUT) :: win 570 571 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 572 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 573 574 INTEGER, DIMENSION(1) :: buf_shape 575 576 REAL(dp), DIMENSION(:), POINTER :: buf 577 REAL(dp), DIMENSION(:), POINTER :: p1 578 579 TYPE(C_PTR), SAVE :: base_ptr 580 TYPE(C_PTR), SAVE :: rem_ptr 429 581 430 582 … … 437 589 wsize = 1 438 590 ENDIF 439 wsize = wsize * 8 ! Please note, size is always in bytes, independently of the displacement440 ! unit441 442 CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr )591 wsize = wsize * dp ! please note, size is always in bytes, independently of the displacement 592 ! unit 593 594 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) 443 595 ! 444 596 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) … … 453 605 pe_from = MOD( pe_from, this%sh_npes ) 454 606 455 END SUBROUTINE sm_allocate_shared_1d 456 457 458 !--------------------------------------------------------------------------------------------------! 459 ! Description: 460 ! ------------ 461 !> Allocate shared 2d-REAL array on ALL threads 462 !--------------------------------------------------------------------------------------------------! 463 SUBROUTINE sm_allocate_shared_2d( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) 464 465 IMPLICIT NONE 466 467 CLASS(sm_class), INTENT(INOUT) :: this !< 468 469 INTEGER(iwp) :: disp_unit !< 470 INTEGER(iwp), INTENT(IN) :: n_nxlg !< 471 INTEGER(iwp), INTENT(IN) :: n_nxrg !< 472 INTEGER(iwp), INTENT(IN) :: n_nyng !< 473 INTEGER(iwp), INTENT(IN) :: n_nysg !< 474 INTEGER(iwp), SAVE :: pe_from = 0 !< 475 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 476 INTEGER(iwp), INTENT(OUT) :: win !< 477 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 478 479 INTEGER(iwp), DIMENSION(2) :: buf_shape !< 480 481 REAL(wp), DIMENSION(:,:), POINTER :: buf !< 482 REAL(wp), DIMENSION(:,:), POINTER :: p2 !< 483 484 TYPE(C_PTR),SAVE :: base_ptr !< 485 TYPE(C_PTR),SAVE :: rem_ptr !< 607 END SUBROUTINE sm_allocate_shared_1d_64 608 609 610 !--------------------------------------------------------------------------------------------------! 611 ! Description: 612 ! ------------ 613 !> Allocate shared 1d-REAL (32 Bit) array on ALL threads 614 !--------------------------------------------------------------------------------------------------! 615 SUBROUTINE sm_allocate_shared_1d_32( this, p1, d1, d2, win ) 616 617 IMPLICIT NONE 618 619 CLASS(sm_class), INTENT(inout) :: this 620 621 INTEGER(iwp) :: disp_unit 622 INTEGER(iwp), INTENT(IN) :: d1 623 INTEGER(iwp), INTENT(IN) :: d2 624 INTEGER(iwp), SAVE :: pe_from = 0 625 INTEGER(iwp), INTENT(OUT) :: win 626 627 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 628 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 629 630 INTEGER, DIMENSION(1) :: buf_shape 631 632 REAL(sp), DIMENSION(:), POINTER :: buf 633 REAL(sp), DIMENSION(:), POINTER :: p1 634 635 TYPE(C_PTR), SAVE :: base_ptr 636 TYPE(C_PTR), SAVE :: rem_ptr 637 638 639 IF ( this%no_shared_memory_in_this_run ) RETURN 640 ! 641 !-- Allocate shared memory on node rank 0 threads. 642 IF ( this%sh_rank == pe_from ) THEN 643 wsize = d2 - d1 + 1 644 ELSE 645 wsize = 1 646 ENDIF 647 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 648 ! unit 649 650 CALL MPI_WIN_ALLOCATE_SHARED( wsize, sp, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) 651 ! 652 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 653 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 654 ! 655 !-- Convert C- to Fortran-pointer 656 buf_shape(1) = d2 - d1 + 1 657 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 658 p1(d1:) => buf 659 ! 660 !-- Allocate shared memory in round robin on all PEs of a node. 661 pe_from = MOD( pe_from, this%sh_npes ) 662 663 END SUBROUTINE sm_allocate_shared_1d_32 664 665 666 !--------------------------------------------------------------------------------------------------! 667 ! Description: 668 ! ------------ 669 !> Allocate shared 1d-INTEGER array on ALL threads 670 !--------------------------------------------------------------------------------------------------! 671 SUBROUTINE sm_allocate_shared_1di( this, p1, d1, d2, win ) 672 673 IMPLICIT NONE 674 675 CLASS(sm_class), INTENT(inout) :: this 676 677 INTEGER(iwp) :: disp_unit 678 INTEGER(iwp), INTENT(IN) :: d1 679 INTEGER(iwp), INTENT(IN) :: d2 680 INTEGER(iwp), SAVE :: pe_from = 0 681 INTEGER(iwp), INTENT(OUT) :: win 682 683 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 684 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 685 686 INTEGER, DIMENSION(1) :: buf_shape 687 688 INTEGER(iwp), DIMENSION(:), POINTER :: buf 689 INTEGER(iwp), DIMENSION(:), POINTER :: p1 690 691 TYPE(C_PTR), SAVE :: base_ptr 692 TYPE(C_PTR), SAVE :: rem_ptr 693 694 695 IF ( this%no_shared_memory_in_this_run ) RETURN 696 ! 697 !-- Allocate shared memory on node rank 0 threads. 698 IF ( this%sh_rank == pe_from ) THEN 699 wsize = d2 - d1 + 1 700 ELSE 701 wsize = 1 702 ENDIF 703 wsize = wsize * iwp ! Please note, size is always in bytes, independently of the displacement 704 ! unit 705 706 CALL MPI_WIN_ALLOCATE_SHARED( wsize, iwp, MPI_INFO_NULL, this%comm_shared,base_ptr, win, ierr ) 707 ! 708 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 709 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 710 ! 711 !-- Convert C- to Fortran-pointer 712 buf_shape(1) = d2 - d1 + 1 713 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 714 p1(d1:) => buf 715 ! 716 !-- Allocate shared memory in round robin on all PEs of a node. 717 pe_from = MOD( pe_from, this%sh_npes ) 718 719 END SUBROUTINE sm_allocate_shared_1di 720 721 722 !--------------------------------------------------------------------------------------------------! 723 ! Description: 724 ! ------------ 725 !> Allocate shared 2d-REAL array on ALL threads (64 Bit) 726 !--------------------------------------------------------------------------------------------------! 727 SUBROUTINE sm_allocate_shared_2d_64( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) 728 729 IMPLICIT NONE 730 731 CLASS(sm_class), INTENT(INOUT) :: this 732 733 INTEGER(iwp) :: disp_unit 734 INTEGER(iwp), INTENT(IN) :: n_nxlg 735 INTEGER(iwp), INTENT(IN) :: n_nxrg 736 INTEGER(iwp), INTENT(IN) :: n_nyng 737 INTEGER(iwp), INTENT(IN) :: n_nysg 738 INTEGER(iwp), SAVE :: pe_from = 0 739 INTEGER(iwp), INTENT(OUT) :: win 740 741 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 742 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 743 744 INTEGER(iwp), DIMENSION(2) :: buf_shape 745 746 REAL(dp), DIMENSION(:,:), POINTER :: buf 747 REAL(dp), DIMENSION(:,:), POINTER :: p2 748 749 TYPE(C_PTR), SAVE :: base_ptr 750 TYPE(C_PTR), SAVE :: rem_ptr 486 751 487 752 … … 495 760 ENDIF 496 761 497 wsize = wsize * 8! Please note, size is always in bytes, independently of the displacement498 ! unit762 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement 763 ! unit 499 764 500 765 CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) … … 512 777 pe_from = MOD( pe_from, this%sh_npes ) 513 778 514 END SUBROUTINE sm_allocate_shared_2d 779 END SUBROUTINE sm_allocate_shared_2d_64 780 781 782 !--------------------------------------------------------------------------------------------------! 783 ! Description: 784 ! ------------ 785 !> Allocate shared 2d-REAL (32 Bit) array on ALL threads 786 !--------------------------------------------------------------------------------------------------! 787 SUBROUTINE sm_allocate_shared_2d_32( this, p2, n_nxlg, n_nxrg, n_nysg, n_nyng, win ) 788 789 IMPLICIT NONE 790 791 CLASS(sm_class), INTENT(INOUT) :: this 792 793 INTEGER(iwp) :: disp_unit 794 INTEGER(iwp), INTENT(IN) :: n_nxlg 795 INTEGER(iwp), INTENT(IN) :: n_nxrg 796 INTEGER(iwp), INTENT(IN) :: n_nyng 797 INTEGER(iwp), INTENT(IN) :: n_nysg 798 INTEGER(iwp), SAVE :: pe_from = 0 799 INTEGER(iwp), INTENT(OUT) :: win 800 801 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 802 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 803 804 INTEGER(iwp), DIMENSION(2) :: buf_shape 805 806 REAL(sp), DIMENSION(:,:), POINTER :: buf 807 REAL(sp), DIMENSION(:,:), POINTER :: p2 808 809 TYPE(C_PTR), SAVE :: base_ptr 810 TYPE(C_PTR), SAVE :: rem_ptr 811 812 813 IF ( this%no_shared_memory_in_this_run ) RETURN 814 ! 815 !-- Allocate shared memory on node rank 0 threads. 816 IF ( this%sh_rank == pe_from ) THEN 817 wsize = ( n_nyng - n_nysg + 1 ) * ( n_nxrg - n_nxlg + 1 ) 818 ELSE 819 wsize = 1 820 ENDIF 821 822 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 823 ! unit 824 825 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 826 ! 827 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 828 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 829 ! 830 !-- Convert C- to Fortran-pointer 831 buf_shape(2) = n_nyng - n_nysg + 1 832 buf_shape(1) = n_nxrg - n_nxlg + 1 833 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 834 p2(n_nxlg:, n_nysg:) => buf 835 ! 836 !-- Allocate shared memory in round robin on all PEs of a node. 837 pe_from = MOD( pe_from, this%sh_npes ) 838 839 END SUBROUTINE sm_allocate_shared_2d_32 515 840 516 841 … … 532 857 INTEGER(iwp), INTENT(IN) :: n_nysg !< 533 858 INTEGER(iwp), SAVE :: pe_from = 0 !< 859 INTEGER(iwp), INTENT(OUT) :: win !< 860 534 861 INTEGER(kind=MPI_ADDRESS_KIND) :: rem_size !< 535 INTEGER(iwp), INTENT(OUT) :: win !<536 862 INTEGER(kind=MPI_ADDRESS_KIND) :: wsize !< 537 863 … … 541 867 INTEGER(iwp), DIMENSION(:,:), POINTER :: p2i !< 542 868 543 TYPE(C_PTR), SAVE:: base_ptr !<544 TYPE(C_PTR), SAVE:: rem_ptr !<869 TYPE(C_PTR), SAVE :: base_ptr !< 870 TYPE(C_PTR), SAVE :: rem_ptr !< 545 871 546 872 … … 577 903 ! Description: 578 904 ! ------------ 579 !> Allocate shared 3d-REAL array on ALL threads580 !--------------------------------------------------------------------------------------------------! 581 SUBROUTINE sm_allocate_shared_3d ( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win )905 !> Allocate shared 3d-REAL (64 Bit) array on ALL threads 906 !--------------------------------------------------------------------------------------------------! 907 SUBROUTINE sm_allocate_shared_3d_64( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) 582 908 583 909 IMPLICIT NONE … … 593 919 INTEGER, INTENT(IN) :: d3s !< 594 920 INTEGER, SAVE :: pe_from = 0 !< 921 INTEGER, INTENT(OUT) :: win !< 922 595 923 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size !< 596 INTEGER, INTENT(OUT) :: win !<597 924 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize !< 598 925 599 926 INTEGER, DIMENSION(3) :: buf_shape !< 600 927 601 REAL( wp), DIMENSION(:,:,:), POINTER :: buf !<602 REAL( wp), DIMENSION(:,:,:), POINTER :: p3 !<928 REAL(dp), DIMENSION(:,:,:), POINTER :: buf !< 929 REAL(dp), DIMENSION(:,:,:), POINTER :: p3 !< 603 930 604 931 TYPE(C_PTR), SAVE :: base_ptr !< … … 615 942 ENDIF 616 943 617 wsize = wsize * 8! Please note, size is always in bytes, independently of the displacement944 wsize = wsize * dp ! Please note, size is always in bytes, independently of the displacement 618 945 ! unit 619 946 620 CALL MPI_WIN_ALLOCATE_SHARED( wsize, 8, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr )947 CALL MPI_WIN_ALLOCATE_SHARED( wsize, dp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 621 948 ! 622 949 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) … … 633 960 pe_from = MOD( pe_from, this%sh_npes ) 634 961 635 END SUBROUTINE sm_allocate_shared_3d 962 END SUBROUTINE sm_allocate_shared_3d_64 963 964 965 !--------------------------------------------------------------------------------------------------! 966 ! Description: 967 ! ------------ 968 !> Allocate shared 3d-REAL (32 Bit) array on ALL threads 969 !--------------------------------------------------------------------------------------------------! 970 SUBROUTINE sm_allocate_shared_3d_32( this, p3, d1s, d1e, d2s, d2e, d3s, d3e, win ) 971 972 IMPLICIT NONE 973 974 CLASS(sm_class), INTENT(inout) :: this 975 976 INTEGER :: disp_unit 977 INTEGER, INTENT(IN) :: d1e 978 INTEGER, INTENT(IN) :: d1s 979 INTEGER, INTENT(IN) :: d2e 980 INTEGER, INTENT(IN) :: d2s 981 INTEGER, INTENT(IN) :: d3e 982 INTEGER, INTENT(IN) :: d3s 983 INTEGER, SAVE :: pe_from = 0 984 INTEGER, INTENT(OUT) :: win 985 986 INTEGER(KIND=MPI_ADDRESS_KIND) :: rem_size 987 INTEGER(KIND=MPI_ADDRESS_KIND) :: wsize 988 989 INTEGER, DIMENSION(3) :: buf_shape 990 991 REAL(sp), DIMENSION(:,:,:), POINTER :: buf 992 REAL(sp), DIMENSION(:,:,:), POINTER :: p3 993 994 TYPE(C_PTR), SAVE :: base_ptr 995 TYPE(C_PTR), SAVE :: rem_ptr 996 997 998 IF ( this%no_shared_memory_in_this_run ) RETURN 999 ! 1000 !-- Allocate shared memory on node rank 0 threads. 1001 IF ( this%sh_rank == pe_from ) THEN 1002 wsize = ( d3e - d3s + 1 ) * ( d2e - d2s + 1 ) * ( d1e - d1s + 1 ) 1003 ELSE 1004 wsize = 1 1005 ENDIF 1006 1007 wsize = wsize * sp ! Please note, size is always in bytes, independently of the displacement 1008 ! unit 1009 1010 CALL MPI_WIN_ALLOCATE_SHARED( wsize, sp, MPI_INFO_NULL, this%comm_shared, base_ptr, win, ierr ) 1011 ! 1012 !-- Get C-pointer of the memory located on node-rank pe_from (sh_rank == pe_from) 1013 CALL MPI_WIN_SHARED_QUERY( win, pe_from, rem_size, disp_unit, rem_ptr, ierr ) 1014 ! 1015 !-- Convert C- to Fortran-pointer 1016 buf_shape(3) = d3e - d3s + 1 1017 buf_shape(2) = d2e - d2s + 1 1018 buf_shape(1) = d1e - d1s + 1 1019 CALL C_F_POINTER( rem_ptr, buf, buf_shape ) 1020 p3(d1s:,d2s:,d3s:) => buf 1021 ! 1022 !-- Allocate shared memory in round robin on all PEs of a node. 1023 pe_from = MOD( pe_from, this%sh_npes ) 1024 1025 END SUBROUTINE sm_allocate_shared_3d_32 1026 636 1027 #endif 637 1028 … … 694 1085 INTEGER(iwp), INTENT(INOUT) :: win !< 695 1086 696 IF ( this%no_shared_memory_in_this_run .OR. win == -1234567890 ) RETURN 697 ! win is used just to avoid compile errors because of unused arguments 1087 IF ( this%no_shared_memory_in_this_run ) RETURN 698 1088 #if defined( __parallel ) 699 1089 CALL MPI_WIN_FREE( win, ierr ) 700 1090 #endif 1091 win = -1 701 1092 702 1093 END SUBROUTINE sm_free_shared … … 723 1114 END SUBROUTINE sm_node_barrier 724 1115 1116 1117 SUBROUTINE save_grid_into_this_class( this ) 1118 1119 IMPLICIT NONE 1120 1121 CLASS(domain_decomposition_grid_features), INTENT(inout) :: this !< 1122 1123 this%myid = myid !< 1124 this%nnx = nnx !< 1125 this%nny = nny !< 1126 this%nx = nx !< 1127 this%nxl = nxl !< 1128 this%nxr = nxr !< 1129 this%ny = ny !< 1130 this%nyn = nyn !< 1131 this%nys = nys !< 1132 this%numprocs = numprocs !< 1133 this%comm2d = comm2d !< 1134 1135 END SUBROUTINE save_grid_into_this_class 1136 1137 1138 SUBROUTINE activate_grid_from_this_class( this ) 1139 1140 IMPLICIT NONE 1141 1142 CLASS(domain_decomposition_grid_features), INTENT(inout) :: this !< 1143 1144 myid = this%myid !< 1145 nnx = this%nnx !< 1146 nny = this%nny !< 1147 nx = this%nx !< 1148 nxl = this%nxl !< 1149 nxr = this%nxr !< 1150 ny = this%ny !< 1151 nyn = this%nyn !< 1152 nys = this%nys !< 1153 numprocs = this%numprocs !< 1154 comm2d = this%comm2d !< 1155 1156 END SUBROUTINE activate_grid_from_this_class 1157 725 1158 END MODULE shared_memory_io_mod
Note: See TracChangeset
for help on using the changeset viewer.