Changeset 2599 for palm/trunk/SOURCE/pmc_parent_mod.f90
- Timestamp:
- Nov 1, 2017 1:18:45 PM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/pmc_parent_mod.f90
r2101 r2599 26 26 ! ----------------- 27 27 ! $Id$ 28 ! Some cleanup and commenting improvements only. 29 ! 30 ! 2101 2017-01-05 16:42:31Z suehring 28 31 ! 29 32 ! 2000 2016-08-20 18:09:15Z knoop … … 192 195 193 196 ! 194 !-- Intra communicat er is used for MPI_GET197 !-- Intra communicator is used for MPI_GET 195 198 CALL MPI_INTERCOMM_MERGE( children(childid)%inter_comm, .FALSE., & 196 199 children(childid)%intra_comm, istat ) … … 232 235 233 236 IF ( m_model_rank == 0 ) THEN 234 235 ! 236 !-- Sort to ascending parent PE order 237 ! 238 !-- Sort to ascending parent process order 237 239 CALL pmc_sort( index_list, 6 ) 238 239 240 is = 1 240 241 DO ip = 0, m_model_npes-1 241 242 ! 243 !-- Split into parent PEs 242 ! 243 !-- Split into parent processes 244 244 ie = is - 1 245 246 ! 247 !-- There may be no entry for this PE 245 ! 246 !-- There may be no entry for this process 248 247 IF ( is <= SIZE( index_list,2 ) .AND. ie >= 0 ) THEN 249 250 248 DO WHILE ( index_list(6,ie+1 ) == ip ) 251 249 ie = ie + 1 252 250 IF ( ie == SIZE( index_list,2 ) ) EXIT 253 251 ENDDO 254 255 252 ian = ie - is + 1 256 257 253 ELSE 258 254 is = -1 … … 260 256 ian = 0 261 257 ENDIF 262 263 ! 264 !-- Send data to other parent PEs 258 ! 259 !-- Send data to other parent processes 265 260 IF ( ip == 0 ) THEN 266 261 indchildren(childid)%nrpoints = ian … … 279 274 ENDIF 280 275 is = ie + 1 281 282 276 ENDDO 283 284 277 ELSE 285 286 278 CALL MPI_RECV( indchildren(childid)%nrpoints, 1, MPI_INTEGER, 0, 1000, & 287 279 m_model_comm, MPI_STATUS_IGNORE, istat ) 288 280 ian = indchildren(childid)%nrpoints 289 290 281 IF ( ian > 0 ) THEN 291 282 ALLOCATE( indchildren(childid)%index_list_2d(6,ian) ) … … 294 285 MPI_STATUS_IGNORE, istat) 295 286 ENDIF 296 297 287 ENDIF 298 299 288 CALL set_pe_index_list( childid, children(childid), & 300 289 indchildren(childid)%index_list_2d, & … … 328 317 329 318 next_array_in_list = next_array_in_list + 1 330 331 ! 332 !-- Array names are the same on all children PEs, so take first PEto get the name319 ! 320 !-- Array names are the same on all children processes, so take first 321 !-- process to get the name 333 322 ape => children(childid)%pes(1) 334 323 335 324 IF ( next_array_in_list > ape%nr_arrays ) THEN 336 337 325 ! 338 326 !-- All arrays are done … … 343 331 ar => ape%array_list(next_array_in_list) 344 332 myname = ar%name 345 346 333 ! 347 334 !-- Return true if legal array … … 413 400 414 401 array_adr = C_LOC(array) 415 416 402 ! 417 403 !-- In PALM's pointer version, two indices have to be stored internally. … … 469 455 rcount = 0 470 456 bufsize = 8 471 472 457 ! 473 458 !-- First stride: compute size and set index … … 493 478 CALL MPI_ISEND( myindex, 1, MPI_INTEGER, i-1, tag, & 494 479 children(childid)%inter_comm, req(rcount), ierr ) 495 496 480 ! 497 481 !-- Maximum of 1024 outstanding requests 498 !-- TODO: what does this limit mean? 482 !-- TODO: what does this limit mean? Does outstanding mean pending? 499 483 IF ( rcount == 1024 ) THEN 500 484 CALL MPI_WAITALL( rcount, req, MPI_STATUSES_IGNORE, ierr ) … … 505 489 bufsize = bufsize + arlen 506 490 ar%sendsize = arlen 507 508 491 ENDDO 509 492 … … 513 496 514 497 ENDDO 515 516 498 ! 517 499 !-- Create RMA (One Sided Communication) window for data buffer parent to … … 530 512 children(childid)%intra_comm, & 531 513 children(childid)%win_parent_child, ierr ) 532 533 514 ! 534 515 !-- Open window to set data 535 516 CALL MPI_WIN_FENCE( 0, children(childid)%win_parent_child, ierr ) 536 537 517 ! 538 518 !-- Second stride: set buffer pointer … … 555 535 ENDDO 556 536 ENDDO 557 558 537 ! 559 538 !-- Child to parent direction 560 539 bufsize = 8 561 562 540 ! 563 541 !-- First stride: compute size and set index 564 542 DO i = 1, children(childid)%inter_npes 565 566 543 ape => children(childid)%pes(i) 567 544 tag = 300 568 569 545 DO j = 1, ape%nr_arrays 570 571 546 ar => ape%array_list(j) 572 573 547 ! 574 548 !-- Receive index from child … … 576 550 CALL MPI_RECV( myindex, 1, MPI_INTEGER, i-1, tag, & 577 551 children(childid)%inter_comm, MPI_STATUS_IGNORE, ierr ) 578 579 552 IF ( ar%nrdims == 3 ) THEN 580 553 bufsize = MAX( bufsize, ape%nrele * ar%a_dim(4) ) … … 583 556 ENDIF 584 557 ar%recvindex = myindex 585 586 558 ENDDO 587 588 ENDDO 589 559 ENDDO 590 560 ! 591 561 !-- Create RMA (one sided communication) data buffer. … … 596 566 597 567 CALL MPI_BARRIER( children(childid)%intra_comm, ierr ) 598 599 568 ! 600 569 !-- Second stride: set buffer pointer 601 570 DO i = 1, children(childid)%inter_npes 602 603 571 ape => children(childid)%pes(i) 604 605 572 DO j = 1, ape%nr_arrays 606 573 ar => ape%array_list(j) 607 574 ar%recvbuf = base_ptr 608 575 ENDDO 609 610 576 ENDDO 611 577 … … 654 620 655 621 DO ip = 1, children(childid)%inter_npes 656 657 622 ape => children(childid)%pes(ip) 658 659 623 DO j = 1, ape%nr_arrays 660 661 624 ar => ape%array_list(j) 662 625 myindex = 1 663 664 626 IF ( ar%nrdims == 2 ) THEN 665 666 627 buf_shape(1) = ape%nrele 667 628 CALL C_F_POINTER( ar%sendbuf, buf, buf_shape ) … … 671 632 myindex = myindex + 1 672 633 ENDDO 673 674 634 ELSEIF ( ar%nrdims == 3 ) THEN 675 676 635 buf_shape(1) = ape%nrele*ar%a_dim(4) 677 636 CALL C_F_POINTER( ar%sendbuf, buf, buf_shape ) … … 682 641 myindex = myindex + ar%a_dim(4) 683 642 ENDDO 684 685 643 ENDIF 686 687 644 ENDDO 688 689 ENDDO 690 645 ENDDO 691 646 ! 692 647 !-- Buffer is filled … … 727 682 728 683 t1 = pmc_time() 729 730 684 ! 731 685 !-- Wait for child to fill buffer … … 733 687 t2 = pmc_time() - t1 734 688 IF ( PRESENT( waittime ) ) waittime = t2 735 736 689 ! 737 690 !-- TODO: check next statement … … 741 694 742 695 DO ip = 1, children(childid)%inter_npes 743 744 696 ape => children(childid)%pes(ip) 745 746 697 DO j = 1, ape%nr_arrays 747 748 698 ar => ape%array_list(j) 749 699 750 700 IF ( ar%recvindex < 0 ) CYCLE 751 701 … … 755 705 nr = ape%nrele * ar%a_dim(4) 756 706 ENDIF 757 758 707 buf_shape(1) = nr 759 708 CALL C_F_POINTER( ar%recvbuf, buf, buf_shape ) 760 761 709 ! 762 710 !-- MPI passive target RMA 763 711 IF ( nr > 0 ) THEN 764 712 target_disp = ar%recvindex - 1 765 766 ! 767 !-- Child PEs are located behind parent PEs 713 ! 714 !-- Child processes are located behind parent process 768 715 target_pe = ip - 1 + m_model_npes 769 716 CALL MPI_WIN_LOCK( MPI_LOCK_SHARED, target_pe, 0, & … … 774 721 children(childid)%win_parent_child, ierr ) 775 722 ENDIF 776 777 723 myindex = 1 778 724 IF ( ar%nrdims == 2 ) THEN 779 780 725 CALL C_F_POINTER( ar%data, data_2d, ar%a_dim(1:2) ) 781 726 DO ij = 1, ape%nrele … … 783 728 myindex = myindex + 1 784 729 ENDDO 785 786 730 ELSEIF ( ar%nrdims == 3 ) THEN 787 788 731 CALL C_F_POINTER( ar%data, data_3d, ar%a_dim(1:3)) 789 732 DO ij = 1, ape%nrele … … 792 735 myindex = myindex + ar%a_dim(4) 793 736 ENDDO 794 795 737 ENDIF 796 797 738 ENDDO 798 799 739 ENDDO 800 740 … … 832 772 833 773 ! 834 !-- Set array for child inter PE0774 !-- Set array for child inter process 0 835 775 IMPLICIT NONE 836 776 … … 849 789 850 790 DO i = 1, children(childid)%inter_npes 851 852 791 ape => children(childid)%pes(i) 853 792 ar => ape%array_list(next_array_in_list) … … 855 794 ar%a_dim = dims 856 795 ar%data = array_adr 857 858 796 IF ( PRESENT( second_adr ) ) THEN 859 797 ar%po_data(1) = array_adr … … 863 801 ar%po_data(2) = C_NULL_PTR 864 802 ENDIF 865 866 803 ENDDO 867 804 … … 885 822 886 823 DO ip = 1, children(childid)%inter_npes 887 888 824 ape => children(childid)%pes(ip) 889 890 825 DO j = 1, ape%nr_arrays 891 892 826 ar => ape%array_list(j) 893 827 IF ( iactive == 1 .OR. iactive == 2 ) THEN 894 828 ar%data = ar%po_data(iactive) 895 829 ENDIF 896 897 830 ENDDO 898 899 831 ENDDO 900 832 … … 931 863 932 864 ! 933 !-- First, count entries for every remote child PE865 !-- First, count entries for every remote child process 934 866 DO i = 1, mychild%inter_npes 935 867 ape => mychild%pes(i) 936 868 ape%nrele = 0 937 869 ENDDO 938 939 870 ! 940 871 !-- Loop over number of coarse grid cells 941 872 DO j = 1, nrp 942 rempe = index_list(5,j) + 1 ! PE number on remote PE873 rempe = index_list(5,j) + 1 ! process number on remote process 943 874 ape => mychild%pes(rempe) 944 ape%nrele = ape%nrele + 1 ! Increment number of elements for this child PE875 ape%nrele = ape%nrele + 1 ! Increment number of elements for this child process 945 876 ENDDO 946 877 … … 951 882 952 883 remind = 0 953 954 884 ! 955 885 !-- Second, create lists … … 963 893 ape%locind(ind)%j = index_list(2,j) 964 894 ENDDO 965 966 ! 967 !-- Prepare number of elements for children PEs 895 ! 896 !-- Prepare number of elements for children processes 968 897 CALL pmc_alloc_mem( rldef, mychild%inter_npes*2 ) 969 970 ! 971 !-- Number of child PEs * size of INTEGER (i just arbitrary INTEGER) 898 ! 899 !-- Number of child processes * size of INTEGER (i just arbitrary INTEGER) 972 900 winsize = mychild%inter_npes*c_sizeof(i)*2 973 901 974 902 CALL MPI_WIN_CREATE( rldef, winsize, iwp, MPI_INFO_NULL, & 975 903 mychild%intra_comm, indwin, ierr ) 976 977 904 ! 978 905 !-- Open window to set data 979 906 CALL MPI_WIN_FENCE( 0, indwin, ierr ) 980 907 981 rldef(1) = 0 ! index on remote PE 0 982 rldef(2) = remind(1) ! number of elements on remote PE 0 983 908 rldef(1) = 0 ! index on remote process 0 909 rldef(2) = remind(1) ! number of elements on remote process 0 984 910 ! 985 911 !-- Reserve buffer for index array 986 912 DO i = 2, mychild%inter_npes 987 913 i2 = (i-1) * 2 + 1 988 rldef(i2) = rldef(i2-2) + rldef(i2-1) * 2 ! index on remote PE 989 rldef(i2+1) = remind(i) ! number of elements on remote PE 990 ENDDO 991 914 rldef(i2) = rldef(i2-2) + rldef(i2-1) * 2 ! index on remote process 915 rldef(i2+1) = remind(i) ! number of elements on remote process 916 ENDDO 992 917 ! 993 918 !-- Close window to allow child to access data 994 919 CALL MPI_WIN_FENCE( 0, indwin, ierr ) 995 996 920 ! 997 921 !-- Child has retrieved data … … 1000 924 i2 = 2 * mychild%inter_npes - 1 1001 925 winsize = ( rldef(i2) + rldef(i2+1) ) * 2 1002 1003 926 ! 1004 927 !-- Make sure, MPI_ALLOC_MEM works … … 1013 936 !-- Open window to set data 1014 937 CALL MPI_WIN_FENCE( 0, indwin2, ierr ) 1015 1016 938 ! 1017 939 !-- Create the 2D index list 1018 940 DO j = 1, nrp 1019 rempe = index_list(5,j) + 1 ! PE number on remote PE941 rempe = index_list(5,j) + 1 ! process number on remote process 1020 942 ape => mychild%pes(rempe) 1021 943 i2 = rempe * 2 - 1 … … 1025 947 rldef(i2) = rldef(i2)+2 1026 948 ENDDO 1027 1028 949 ! 1029 950 !-- All data are set 1030 951 CALL MPI_WIN_FENCE( 0, indwin2, ierr ) 1031 1032 952 ! 1033 953 !-- Don't know why, but this barrier is necessary before windows can be freed
Note: See TracChangeset
for help on using the changeset viewer.