MODULE pmc_client !--------------------------------------------------------------------------------! ! This file is part of PALM. ! ! PALM is free software: you can redistribute it and/or modify it under the terms ! of the GNU General Public License as published by the Free Software Foundation, ! either version 3 of the License, or (at your option) any later version. ! ! PALM is distributed in the hope that it will be useful, but WITHOUT ANY ! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR ! A PARTICULAR PURPOSE. See the GNU General Public License for more details. ! ! You should have received a copy of the GNU General Public License along with ! PALM. If not, see . ! ! Copyright 1997-2015 Leibniz Universitaet Hannover !--------------------------------------------------------------------------------! ! ! Current revisions: ! ------------------ ! ! ! Former revisions: ! ----------------- ! $Id: pmc_client.f90 1787 2016-03-08 06:57:00Z maronga $ ! ! 1786 2016-03-08 05:49:27Z raasch ! change in client-server data transfer: server now gets data from client ! instead that client put's it to the server ! ! 1783 2016-03-06 18:36:17Z raasch ! Bugfix: wrong data-type in MPI_WIN_CREATE replaced ! ! 1779 2016-03-03 08:01:28Z raasch ! kind=dp replaced by wp, dim_order removed ! array management changed from linked list to sequential loop ! ! 1764 2016-02-28 12:45:19Z raasch ! cpp-statement added (nesting can only be used in parallel mode), ! all kinds given in PALM style ! ! 1762 2016-02-25 12:31:13Z hellstea ! Initial revision by K. Ketelsen ! ! Description: ! ------------ ! ! Client part of Palm Model Coupler !------------------------------------------------------------------------------! #if defined( __parallel ) use, intrinsic :: iso_c_binding #if defined( __lc ) USE MPI #else INCLUDE "mpif.h" #endif USE kinds USE PMC_general, ONLY: ClientDef, DA_NameDef, DA_Namelen, PMC_STATUS_OK, PMC_DA_NAME_ERR, PeDef, ArrayDef, & DA_Desclen, DA_Namelen, PMC_G_SetName, PMC_MAX_ARRAY USE PMC_handle_communicator, ONLY: m_model_comm,m_model_rank,m_model_npes, m_to_server_comm USE PMC_MPI_wrapper, ONLY: PMC_Send_to_Server, PMC_Recv_from_Server, PMC_Time, & PMC_Bcast, PMC_Inter_Bcast, PMC_Alloc_mem IMPLICIT none PRIVATE SAVE Type(ClientDef) :: me INTEGER :: next_array_in_list = 0 INTEGER :: myIndex = 0 !Counter and unique number for Data Arrays ! INTERFACE section INTERFACE PMC_ClientInit MODULE procedure PMC_ClientInit END INTERFACE PMC_ClientInit INTERFACE PMC_Set_DataArray_Name MODULE procedure PMC_Set_DataArray_Name MODULE procedure PMC_Set_DataArray_Name_LastEntry END INTERFACE PMC_Set_DataArray_Name INTERFACE PMC_C_Get_2D_index_list MODULE procedure PMC_C_Get_2D_index_list END INTERFACE PMC_C_Get_2D_index_list INTERFACE PMC_C_clear_next_array_list MODULE procedure PMC_C_clear_next_array_list END INTERFACE PMC_C_clear_next_array_list INTERFACE PMC_C_GetNextArray MODULE procedure PMC_C_GetNextArray END INTERFACE PMC_C_GetNextArray INTERFACE PMC_C_Set_DataArray MODULE procedure PMC_C_Set_DataArray_2d MODULE procedure PMC_C_Set_DataArray_3d END INTERFACE PMC_C_Set_DataArray INTERFACE PMC_C_setInd_and_AllocMem MODULE procedure PMC_C_setInd_and_AllocMem END INTERFACE PMC_C_setInd_and_AllocMem INTERFACE PMC_C_GetBuffer MODULE procedure PMC_C_GetBuffer END INTERFACE PMC_C_GetBuffer INTERFACE PMC_C_PutBuffer MODULE procedure PMC_C_PutBuffer END INTERFACE PMC_C_PutBuffer ! Public section PUBLIC PMC_ClientInit , PMC_Set_DataArray_Name, PMC_C_Get_2D_index_list PUBLIC PMC_C_GetNextArray, PMC_C_Set_DataArray, PMC_C_clear_next_array_list PUBLIC PMC_C_setInd_and_AllocMem , PMC_C_GetBuffer, PMC_C_PutBuffer CONTAINS SUBROUTINE PMC_ClientInit IMPLICIT none INTEGER :: i INTEGER :: istat ! Tailor MPI environment me%model_comm = m_model_comm me%inter_comm = m_to_server_comm ! Get rank and size CALL MPI_Comm_rank (me%model_comm, me%model_rank, istat); CALL MPI_Comm_size (me%model_comm, me%model_npes, istat); CALL MPI_Comm_remote_size (me%inter_comm, me%inter_npes, istat); ! intra communicater is used for MPI_Get CALL MPI_Intercomm_merge (me%inter_comm, .true., me%intra_comm, istat); CALL MPI_Comm_rank (me%intra_comm, me%intra_rank, istat); ALLOCATE (me%PEs(me%inter_npes)) ! !-- Allocate for all Server PEs an array of TYPE ArrayDef to store information of transfer array do i=1,me%inter_npes ALLOCATE(me%PEs(i)%array_list(PMC_MAX_ARRAY)) end do if(me%model_rank == 0) write(0,'(a,5i6)') 'PMC_ClientInit ',me%model_rank,me%model_npes,me%inter_npes,me%intra_rank return END SUBROUTINE PMC_ClientInit SUBROUTINE PMC_Set_DataArray_Name (ServerArrayDesc, ServerArrayName, ClientArrayDesc, ClientArrayName, istat) IMPLICIT none character(len=*),INTENT(IN) :: ServerArrayName character(len=*),INTENT(IN) :: ServerArrayDesc character(len=*),INTENT(IN) :: ClientArrayName character(len=*),INTENT(IN) :: ClientArrayDesc INTEGER,INTENT(OUT) :: istat !-- local variables type(DA_NameDef) :: myName INTEGER :: myPe INTEGER :: my_AddiArray=0 istat = PMC_STATUS_OK if(len(trim(ServerArrayName)) > DA_Namelen .or. & len(trim(ClientArrayName)) > DA_Namelen) then !Name too long istat = PMC_DA_NAME_ERR end if if(m_model_rank == 0) then myIndex = myIndex+1 myName%couple_index = myIndex myName%ServerDesc = trim(ServerArrayDesc) myName%NameOnServer = trim(ServerArrayName) myName%ClientDesc = trim(ClientArrayDesc) myName%NameOnClient = trim(ClientArrayName) end if ! Broadcat to all Client PEs CALL PMC_Bcast ( myName%couple_index, 0, comm=m_model_comm) CALL PMC_Bcast ( myName%ServerDesc, 0, comm=m_model_comm) CALL PMC_Bcast ( myName%NameOnServer, 0, comm=m_model_comm) CALL PMC_Bcast ( myName%ClientDesc, 0, comm=m_model_comm) CALL PMC_Bcast ( myName%NameOnClient, 0, comm=m_model_comm) ! Broadcat to all Server PEs if(m_model_rank == 0) then myPE = MPI_ROOT else myPE = MPI_PROC_NULL endif CALL PMC_Bcast ( myName%couple_index, myPE, comm=m_to_server_comm) CALL PMC_Bcast ( myName%ServerDesc, myPE, comm=m_to_server_comm) CALL PMC_Bcast ( myName%NameOnServer, myPE, comm=m_to_server_comm) CALL PMC_Bcast ( myName%ClientDesc, myPE, comm=m_to_server_comm) CALL PMC_Bcast ( myName%NameOnClient, myPE, comm=m_to_server_comm) CALL PMC_G_SetName (me, myName%couple_index, myName%NameOnClient) return END SUBROUTINE PMC_Set_DataArray_Name SUBROUTINE PMC_Set_DataArray_Name_LastEntry (LastEntry) IMPLICIT none LOGICAL,INTENT(IN),optional :: LastEntry !-- local variables type(DA_NameDef) :: myName INTEGER :: myPe myName%couple_index = -1 if(m_model_rank == 0) then myPE = MPI_ROOT else myPE = MPI_PROC_NULL endif CALL PMC_Bcast ( myName%couple_index, myPE, comm=m_to_server_comm) return END SUBROUTINE PMC_Set_DataArray_Name_LastEntry SUBROUTINE PMC_C_Get_2D_index_list IMPLICIT none INTEGER :: i,j,i2,nr,ierr INTEGER :: dummy INTEGER :: indWin !: MPI window object INTEGER :: indWin2 !: MPI window object INTEGER(KIND=MPI_ADDRESS_KIND) :: win_size !: Size of MPI window 1 (in bytes) INTEGER(KIND=MPI_ADDRESS_KIND) :: disp !: Displacement Unit (Integer = 4, floating poit = 8 INTEGER,DIMENSION(me%inter_npes*2) :: NrEle !: Number of Elements of a horizontal slice TYPE(PeDef),POINTER :: aPE !: Pointer to PeDef structure INTEGER(KIND=MPI_ADDRESS_KIND) :: WinSize !: Size of MPI window 2 (in bytes) INTEGER,DIMENSION(:),POINTER :: myInd ! CALL PMC_C_CGet_Rem_index_list win_size = c_sizeof(dummy) CALL MPI_Win_create (dummy, win_size, iwp, MPI_INFO_NULL, me%intra_comm, indWin, ierr); CALL MPI_Win_fence (0, indWin, ierr) ! Open Window on Server side CALL MPI_Win_fence (0, indWin, ierr) ! Close Window on Server Side and opem on Client side do i=1,me%inter_npes disp = me%model_rank*2 CALL MPI_Get (NrEle((i-1)*2+1),2,MPI_INTEGER,i-1,disp,2,MPI_INTEGER,indWin, ierr) end do CALL MPI_Win_fence (0, indWin, ierr) ! MPI_get is Non-blocking -> data in NrEle is not available until MPI_fence CALL WinSize = 0 do i=1,me%inter_npes !Allocate memory for index array aPE => me%PEs(i) i2 = (i-1)*2+1 nr = NrEle(i2+1) if(nr > 0) then ALLOCATE(aPE%locInd(nr)) else NULLIFY (aPE%locInd) endif WinSize = max(nr,WinSize) !Maximum window size end do ALLOCATE(myInd(2*WinSize)) WinSize = 1 ! local Buffer used in MPI_Get can but must not be inside the MPI Window ! Here, we use a dummy for MPI Window because the server PEs do not access the RMA window via MPI_get or MPI_Put CALL MPI_Win_create (dummy, WinSize, iwp, MPI_INFO_NULL, me%intra_comm, indWin2, ierr); CALL MPI_Win_fence (0, indWin2, ierr) ! MPI_get is Non-blocking -> data in NrEle is not available until MPI_fence CALL CALL MPI_Win_fence (0, indWin2, ierr) ! MPI_get is Non-blocking -> data in NrEle is not available until MPI_fence CALL do i=1,me%inter_npes aPE => me%PEs(i) nr = NrEle(i*2) if(nr > 0 ) then disp = NrEle(2*(i-1)+1) CALL MPI_Win_lock (MPI_LOCK_SHARED , i-1, 0, indWin2, ierr) CALL MPI_Get (myInd,2*nr,MPI_INTEGER,i-1,disp,2*nr,MPI_INTEGER,indWin2, ierr) CALL MPI_Win_unlock (i-1, indWin2, ierr) do j=1,nr aPE%locInd(j)%i = myInd(2*j-1) aPE%locInd(j)%j = myInd(2*j) end do aPE%NrEle = nr else aPE%NrEle = -1 end if end do CALL MPI_Barrier(me%intra_comm, ierr) ! Dont know why, but this barrier is necessary before we can free the windows CALL MPI_Win_free(indWin, ierr); CALL MPI_Win_free(indWin2, ierr); DEALLOCATE (myInd) return END SUBROUTINE PMC_C_Get_2D_index_list SUBROUTINE PMC_C_clear_next_array_list IMPLICIT none next_array_in_list = 0 return END SUBROUTINE PMC_C_clear_next_array_list ! List handling is still required to get minimal interaction with pmc_interface LOGICAL function PMC_C_GetNextArray (myName) character(len=*),INTENT(OUT) :: myName !-- local variables TYPE(PeDef),POINTER :: aPE TYPE(ArrayDef),POINTER :: ar next_array_in_list = next_array_in_list+1 !-- Array Names are the same on all client PE, so take first PE to get the name aPE => me%PEs(1) if(next_array_in_list > aPE%Nr_arrays) then PMC_C_GetNextArray = .false. !all arrays done return end if ar => aPE%array_list(next_array_in_list) myName = ar%name PMC_C_GetNextArray = .true. ! Return true if legal array return END function PMC_C_GetNextArray SUBROUTINE PMC_C_Set_DataArray_2d (array) IMPLICIT none REAL(wp), INTENT(IN) ,DIMENSION(:,:) :: array INTEGER :: NrDims INTEGER,DIMENSION (4) :: dims TYPE(c_ptr) :: array_adr INTEGER :: i TYPE(PeDef),POINTER :: aPE TYPE(ArrayDef),POINTER :: ar dims = 1 NrDims = 2 dims(1) = size(array,1) dims(2) = size(array,2) array_adr = c_loc(array) do i=1,me%inter_npes aPE => me%PEs(i) ar => aPE%array_list(next_array_in_list) ar%NrDims = NrDims ar%A_dim = dims ar%data = array_adr end do return END SUBROUTINE PMC_C_Set_DataArray_2d SUBROUTINE PMC_C_Set_DataArray_3d (array) IMPLICIT none REAL(wp),INTENT(IN),DIMENSION(:,:,:) :: array INTEGER :: NrDims INTEGER,DIMENSION (4) :: dims TYPE(c_ptr) :: array_adr INTEGER :: i TYPE(PeDef),POINTER :: aPE TYPE(ArrayDef),POINTER :: ar dims = 1 NrDims = 3 dims(1) = size(array,1) dims(2) = size(array,2) dims(3) = size(array,3) array_adr = c_loc(array) do i=1,me%inter_npes aPE => me%PEs(i) ar => aPE%array_list(next_array_in_list) !actual array is last array in list ar%NrDims = NrDims ar%A_dim = dims ar%data = array_adr end do return END SUBROUTINE PMC_C_Set_DataArray_3d SUBROUTINE PMC_C_setInd_and_AllocMem IMPLICIT none !-- naming convention: appending _sc -> server to client transfer !-- _cs -> client to server transfer !-- Recv -> server to client transfer !-- Send -> client to server transfer INTEGER :: i, istat, ierr, j INTEGER,PARAMETER :: NoINdex=-1 INTEGER :: rcount INTEGER :: arlen, myIndex, tag INTEGER(idp) :: bufsize ! Size of MPI data Window TYPE(PeDef),POINTER :: aPE TYPE(ArrayDef),POINTER :: ar INTEGER,DIMENSION(1024) :: req character(len=DA_Namelen) :: myName Type(c_ptr) :: base_ptr REAL(kind=wp),DIMENSION(:),POINTER,save :: base_array_sc !Base array REAL(kind=wp),DIMENSION(:),POINTER,save :: base_array_cs !Base array INTEGER(KIND=MPI_ADDRESS_KIND) :: WinSize myIndex = 0 bufsize = 8 !-- Server to client direction !-- First stride, Compute size and set index do i=1,me%inter_npes aPE => me%PEs(i) tag = 200 do j=1,aPE%Nr_arrays ar => aPE%array_list(j) ! Receive Index from client tag = tag+1 CALL MPI_Recv (myIndex, 1, MPI_INTEGER, i-1, tag, me%inter_comm, MPI_STATUS_IGNORE, ierr) if(ar%NrDims == 3) then bufsize = max(bufsize,ar%A_dim(1)*ar%A_dim(2)*ar%A_dim(3)) ! determine max, because client buffer is allocated only once else bufsize = max(bufsize,ar%A_dim(1)*ar%A_dim(2)) end if ar%RecvIndex = myIndex end do end do !-- Create RMA (One Sided Communication) data buffer !-- The buffer for MPI_Get can be PE local, i.e. it can but must not be part of the MPI RMA window CALL PMC_Alloc_mem (base_array_sc, bufsize, base_ptr) me%TotalBufferSize = bufsize*wp ! Total buffer size in Byte !-- Second stride, Set Buffer pointer do i=1,me%inter_npes aPE => me%PEs(i) do j=1,aPE%Nr_arrays ar => aPE%array_list(j) ar%RecvBuf = base_ptr end do end do !-- Client to server direction myIndex = 1 rCount = 0 bufsize = 8 do i=1,me%inter_npes aPE => me%PEs(i) tag = 300 do j=1,aPE%Nr_arrays ar => aPE%array_list(j) if(ar%NrDims == 2) then arlen = aPE%NrEle ! 2D else if(ar%NrDims == 3) then arlen = aPE%NrEle*ar%A_dim(1) ! 3D end if tag = tag+1 rCount = rCount+1 if(aPE%NrEle > 0) then CALL MPI_Isend (myIndex, 1, MPI_INTEGER, i-1, tag, me%inter_comm, req(rCount),ierr) ar%SendIndex = myIndex else CALL MPI_Isend (NoIndex, 1, MPI_INTEGER, i-1, tag, me%inter_comm, req(rCount),ierr) ar%SendIndex = NoIndex end if if(rCount == 1024) then ! Maximum of 1024 outstanding requests CALL MPI_Waitall (rCount, req, MPI_STATUSES_IGNORE, ierr) rCount = 0; end if if(aPE%NrEle > 0) then ar%SendSize = arlen myIndex = myIndex+arlen bufsize = bufsize+arlen end if end do if(rCount > 0) then ! Wait for all send completed CALL MPI_Waitall (rCount, req, MPI_STATUSES_IGNORE, ierr) end if end do !-- Create RMA (One Sided Communication) window for data buffer client to server transfer !-- The buffer of MPI_Get (counter part of transfer) can be PE-local, i.e. it can but must not be part of the MPI RMA window !-- Only one RMA window is required to prepare the data for server -> client transfer on the server side and !-- for client -> server transfer on the client side CALL PMC_Alloc_mem (base_array_cs, bufsize) me%TotalBufferSize = bufsize*wp !Total buffer size in Byte WinSize = me%TotalBufferSize CALL MPI_Win_create (base_array_cs, WinSize, wp, MPI_INFO_NULL, me%intra_comm, me%win_server_client, ierr); CALL MPI_Win_fence (0, me%win_server_client, ierr); ! Open Window to set data CALL MPI_Barrier(me%intra_comm, ierr) !-- Second stride, Set Buffer pointer do i=1,me%inter_npes aPE => me%PEs(i) do j=1,aPE%Nr_arrays ar => aPE%array_list(j) if(aPE%NrEle > 0) then ar%SendBuf = c_loc(base_array_cs(ar%SendIndex)) if(ar%SendIndex+ar%SendSize > bufsize) then write(0,'(a,i4,4i7,1x,a)') 'Client Buffer too small ',i,ar%SendIndex,ar%SendSize,ar%SendIndex+ar%SendSize,bufsize,trim(ar%name) CALL MPI_Abort (MPI_COMM_WORLD, istat, ierr) end if end if end do end do return END SUBROUTINE PMC_C_setInd_and_AllocMem SUBROUTINE PMC_C_GetBuffer (WaitTime) IMPLICIT none REAL(wp), INTENT(OUT), optional :: WaitTime !-- local variables INTEGER :: ip, ij, ierr, j INTEGER :: nr ! Number of Elements to getb from server INTEGER :: myIndex REAL(wp) :: t1,t2 TYPE(PeDef),POINTER :: aPE TYPE(ArrayDef),POINTER :: ar INTEGER,DIMENSION(1) :: buf_shape REAL(wp),POINTER,DIMENSION(:) :: buf REAL(wp),POINTER,DIMENSION(:,:) :: data_2d REAL(wp),POINTER,DIMENSION(:,:,:) :: data_3d character(len=DA_Namelen) :: myName INTEGER(kind=MPI_ADDRESS_KIND) :: target_disp t1 = PMC_Time() CALL MPI_Barrier(me%intra_comm, ierr) ! Wait for server to fill buffer t2 = PMC_Time()-t1 if(present(WaitTime)) WaitTime = t2 CALL MPI_Barrier(me%intra_comm, ierr) ! Wait for buffer is filled do ip=1,me%inter_npes aPE => me%PEs(ip) do j=1,aPE%Nr_arrays ar => aPE%array_list(j) if(ar%NrDims == 2) then nr = aPE%NrEle else if(ar%NrDims == 3) then nr = aPE%NrEle*ar%A_dim(1) end if buf_shape(1) = nr CALL c_f_pointer(ar%RecvBuf, buf, buf_shape) ! !-- MPI passive target RMA if(nr > 0) then target_disp = (ar%RecvIndex-1) CALL MPI_Win_lock (MPI_LOCK_SHARED , ip-1, 0, me%win_server_client, ierr) CALL MPI_Get (buf, nr, MPI_REAL, ip-1, target_disp, nr, MPI_REAL, me%win_server_client, ierr) CALL MPI_Win_unlock (ip-1, me%win_server_client, ierr) end if myIndex = 1 if(ar%NrDims == 2) then CALL c_f_pointer(ar%data, data_2d, ar%A_dim(1:2)) do ij=1,aPE%NrEle data_2d(aPE%locInd(ij)%j,aPE%locInd(ij)%i) = buf(myIndex) myIndex = myIndex+1 end do else if(ar%NrDims == 3) then CALL c_f_pointer(ar%data, data_3d, ar%A_dim(1:3)) do ij=1,aPE%NrEle data_3d(:,aPE%locInd(ij)%j,aPE%locInd(ij)%i) = buf(myIndex:myIndex+ar%A_dim(1)-1) myIndex = myIndex+ar%A_dim(1) end do end if end do end do return END SUBROUTINE PMC_C_GetBuffer SUBROUTINE PMC_C_PutBuffer (WaitTime) IMPLICIT none REAL(wp), INTENT(OUT), optional :: WaitTime !-- local variables INTEGER :: ip, ij, ierr, j INTEGER :: nr ! Number of Elements to getb from server INTEGER :: myIndex REAL(wp) :: t1,t2 TYPE(PeDef),POINTER :: aPE TYPE(ArrayDef),POINTER :: ar INTEGER,DIMENSION(1) :: buf_shape REAL(wp),POINTER,DIMENSION(:) :: buf REAL(wp),POINTER,DIMENSION(:,:) :: data_2d REAL(wp),POINTER,DIMENSION(:,:,:) :: data_3d character(len=DA_Namelen) :: myName INTEGER(kind=MPI_ADDRESS_KIND) :: target_disp t1 = PMC_Time() CALL MPI_Barrier(me%intra_comm, ierr) ! Wait for empty buffer t2 = PMC_Time() if(present(WaitTime)) WaitTime = t2-t1 do ip=1,me%inter_npes aPE => me%PEs(ip) do j=1,aPE%Nr_arrays ar => aPE%array_list(j) myIndex=1 if(ar%NrDims == 2) then buf_shape(1) = aPE%NrEle CALL c_f_pointer(ar%SendBuf, buf, buf_shape) CALL c_f_pointer(ar%data, data_2d, ar%A_dim(1:2)) do ij=1,aPE%NrEle buf(myIndex) = data_2d(aPE%locInd(ij)%j,aPE%locInd(ij)%i) myIndex = myIndex+1 end do else if(ar%NrDims == 3) then buf_shape(1) = aPE%NrEle*ar%A_dim(1) CALL c_f_pointer(ar%SendBuf, buf, buf_shape) CALL c_f_pointer(ar%data, data_3d, ar%A_dim(1:3)) do ij=1,aPE%NrEle buf(myIndex:myIndex+ar%A_dim(1)-1) = data_3d(:,aPE%locInd(ij)%j,aPE%locInd(ij)%i) myIndex = myIndex+ar%A_dim(1) end do end if end do end do ! CALL MPI_Win_fence (0, me%win_server_client, ierr) ! Fence might do it, test later CALL MPI_Barrier(me%intra_comm, ierr) ! buffer is filled return END SUBROUTINE PMC_C_PutBuffer #endif END MODULE pmc_client