C $Header: /u/gcmpack/MITgcm/pkg/flt/exch_recv_get_vec.F,v 1.10 2012/09/06 16:13:53 jmc Exp $
C $Name:  $

#include "CPP_EEOPTIONS.h"
#undef EXCH_USE_SPINNING
#undef DBUG_EXCH_VEC

C--   Contents
C--   o EXCH_RECV_GET_VEC_X_RL
C--   o EXCH_RECV_GET_VEC_Y_RL

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP 0
C !ROUTINE: EXCH_RECV_GET_VEC_X_RL

C !INTERFACE:
      SUBROUTINE EXCH_RECV_GET_VEC_X_RL(
     U                        arrayE, arrayW,
     I                        myd1, myThid )
C     !DESCRIPTION:
C     *==========================================================*
C     | SUBROUTINE EXCH_RECV_GET_VEC_X_RL
C     | o "Receive" or "Get" X edges for RL array.
C     *==========================================================*
C     | Routine that invokes actual message passing receive
C     | of data to update buffer in X direction
C     *==========================================================*

C     !USES:
      IMPLICIT NONE

C     == Global variables ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "EXCH.h"

C     !INPUT/OUTPUT PARAMETERS:
C     arrayE        :: buffer array to collect Eastern Neighbour values
C     arrayW        :: buffer array to collect Western Neighbour values
C     myd1          :: size
C     myThid        :: my Thread Id. number
      INTEGER myd1
      _RL arrayE(myd1, nSx, nSy), arrayW(myd1, nSx, nSy)
      INTEGER myThid
CEOP

C     !LOCAL VARIABLES:
C     bi, bj        :: tile indices
C     biW, bjW      :: West tile indices
C     biE, bjE      :: East tile indices
C     theProc       :: Variables used in message building
C     theTag        :: Variables used in message building
C     theType       :: Variables used in message building
C     theSize       :: Variables used in message building
C     westCommMode  :: variables holding type of communication
C     eastCommMode  ::  a particular tile face uses.
      INTEGER bi, bj
c     INTEGER biW, bjW, biE, bjE
      INTEGER westCommMode
      INTEGER eastCommMode
      INTEGER ioUnit
#ifdef EXCH_USE_SPINNING
      INTEGER spinCount
#endif
#ifdef ALLOW_USE_MPI
      INTEGER theProc, theTag, theType, theSize
      INTEGER mpiStatus(MPI_STATUS_SIZE,4), mpiRc
#endif

C--   Under a "put" scenario we
C--     i. set completetion signal for buffer we put into.
C--    ii. wait for completetion signal indicating data has been put in
C--        our buffer.
C--   Under a messaging mode we "receive" the message.
C--   Under a "get" scenario <= not implemented, we
C--     i. Check that the data is ready.
C--    ii. Read the data.
C--   iii. Set data read flag + memory sync.

      ioUnit = errorMessageUnit

      _BEGIN_MASTER(myThid)

      DO bj=1,nSy
       DO bi=1,nSx
        westCommMode  = _tileCommModeW(bi,bj)
        eastCommMode  = _tileCommModeE(bi,bj)
#ifdef DBUG_EXCH_VEC
        write(ioUnit,'(A,5I6)') 'RECV_X,0 :',myProcId,bi,bj
#endif
c       biE =  _tileBiE(bi,bj)
c       bjE =  _tileBjE(bi,bj)
c       biW =  _tileBiW(bi,bj)
c       bjW =  _tileBjW(bi,bj)
        IF ( westCommMode .EQ. COMM_MSG ) THEN
#ifdef ALLOW_USE_MPI
         IF ( usingMPI ) THEN
          theProc = tilePidW(bi,bj)
          theTag  = _tileTagRecvW(bi,bj)
          theType = _MPI_TYPE_RL
          theSize = myd1
#ifdef DBUG_EXCH_VEC
          write(ioUnit,'(A,5I5,I8)') 'qq2xW: ',myProcId,bi,bj,
     &          theProc,theTag,theSize
#endif
          CALL MPI_RECV( arrayW(1,bi,bj), theSize, theType,
     &                   theProc, theTag, MPI_COMM_MODEL,
     &                   mpiStatus, mpiRc )
c         if (theProc .eq. 0 .or. theProc .eq. 2) then
c         if (arrayW(1,bi,bj) .ne. 0.) then
c            write(errormessageunit,*) 'qq2y: ',myProcId,
c     &      theProc,theTag,theSize,(arrayW(i,bi,bj),i=1,32)
c         else
c            write(errormessageunit,*) 'qq2n: ',myProcId,
c     &      theProc,theTag,theSize,(arrayW(i,bi,bj),i=1,32)
c         endif
c         endif
         ENDIF
#endif /* ALLOW_USE_MPI */
        ENDIF
#ifdef DBUG_EXCH_VEC
        write(ioUnit,'(A,5I6)') 'RECV_X,1 :',myProcId,bi,bj
#endif

        IF ( eastCommMode .EQ. COMM_MSG ) THEN
#ifdef ALLOW_USE_MPI
         IF ( usingMPI ) THEN
          theProc = tilePidE(bi,bj)
          theTag  = _tileTagRecvE(bi,bj)
          theType = _MPI_TYPE_RL
          theSize = myd1
#ifdef DBUG_EXCH_VEC
          write(ioUnit,'(A,5I5,I8)') 'qq2xE: ',myProcId,bi,bj,
     &          theProc,theTag,theSize
#endif
          CALL MPI_RECV( arrayE(1,bi,bj), theSize, theType,
     &                   theProc, theTag, MPI_COMM_MODEL,
     &                   mpiStatus, mpiRc )
         ENDIF
#endif /* ALLOW_USE_MPI */
        ENDIF
#ifdef DBUG_EXCH_VEC
        write(ioUnit,'(A,5I6)') 'RECV_X,2 :',myProcId,bi,bj
#endif
       ENDDO
      ENDDO
#ifdef DBUG_EXCH_VEC
      write(ioUnit,'(A,5I6,I12)') 'RECV_X:',myProcId
#endif

      IF ( .NOT.exchUsesBarrier  ) THEN
       DO bj=1,nSy
        DO bi=1,nSx
         IF ( exchNReqsX(1,bi,bj) .GT. 0 ) THEN
#ifdef ALLOW_USE_MPI
          IF ( usingMPI )
     &    CALL MPI_WAITALL( exchNReqsX(1,bi,bj), exchReqIdX(1,1,bi,bj),
     &                      mpiStatus, mpiRC )
#endif /* ALLOW_USE_MPI */
         ENDIF
C        Clear outstanding requests counter
         exchNReqsX(1,bi,bj) = 0
        ENDDO
       ENDDO
      ENDIF

      _END_MASTER(myThid)

C--   need to sync threads after master has received data ;
C     (done after mpi waitall in case waitall is really needed)
      _BARRIER

C--   Threads wait for buffers I am going to read to be ready.
C     note: added BARRIER in exch_send_put S/R and here above (message
C     mode) so that we no longer needs this (undef EXCH_USE_SPINNING)
#ifdef EXCH_USE_SPINNING
      IF ( exchUsesBarrier  ) THEN
C      o On some machines ( T90 ) use system barrier rather than spinning.
       CALL BARRIER( myThid )
      ELSE
C      o Spin waiting for completetion flag. This avoids a global-lock
C        i.e. we only lock waiting for data that we need.
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         spinCount = 0
         westCommMode = _tileCommModeW(bi,bj)
         eastCommMode = _tileCommModeE(bi,bj)
#ifdef DBUG_EXCH_VEC
          write(ioUnit,'(A,5I6,I12)') 'spin:', myProcId,bi,bj,
     &          westRecvAck(1,bi,bj), eastRecvAck(1,bi,bj), spinCount
#endif
   10    CONTINUE
          CALL FOOL_THE_COMPILER( spinCount )
          spinCount = spinCount+1
#ifdef DBUG_EXCH_VEC
          write(ioUnit,'(A,5I6,I12)') 'spin:', myProcId,bi,bj,
     &          westRecvAck(1,bi,bj), eastRecvAck(1,bi,bj), spinCount
          IF ( myThid.EQ.1 .AND. spinCount.GT. _EXCH_SPIN_LIMIT ) THEN
           STOP ' S/R EXCH_RECV_GET_X: spinCount > _EXCH_SPIN_LIMIT'
          ENDIF
#endif
          IF ( westRecvAck(1,bi,bj) .EQ. 0 ) GOTO 10
          IF ( eastRecvAck(1,bi,bj) .EQ. 0 ) GOTO 10
C        Clear outstanding requests
         westRecvAck(1,bi,bj) = 0
         eastRecvAck(1,bi,bj) = 0
        ENDDO
       ENDDO
      ENDIF
#endif /* EXCH_USE_SPINNING */

      RETURN
      END


C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP 0 C !ROUTINE: EXCH_RECV_GET_VEC_Y_RL C !INTERFACE: SUBROUTINE EXCH_RECV_GET_VEC_Y_RL( U arrayN, arrayS, I myd1, myThid ) C !DESCRIPTION: C *==========================================================* C | SUBROUTINE EXCH_RECV_GET_VEC_Y_RL C | o "Receive" or "Get" Y edges for RL array. C *==========================================================* C | Routine that invokes actual message passing receive C | of data to update buffer in Y direction C *==========================================================* C !USES: IMPLICIT NONE C == Global variables == #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "EXCH.h" C !INPUT/OUTPUT PARAMETERS: C arrayN :: buffer array to collect Northern Neighbour values C arrayS :: buffer array to collect Southern Neighbour values C myd1 :: size C myThid :: my Thread Id. number INTEGER myd1 _RL arrayN(myd1, nSx, nSy), arrayS(myd1, nSx, nSy) INTEGER myThid CEOP C !LOCAL VARIABLES: C bi, bj :: tile indices C biS, bjS :: South tile indices C biN, bjN :: North tile indices C theProc :: Variables used in message building C theTag :: Variables used in message building C theType :: Variables used in message building C theSize :: Variables used in message building C southCommMode :: variables holding type of communication C northCommMode :: a particular tile face uses. INTEGER bi, bj c INTEGER biS, bjS, biN, bjN INTEGER southCommMode INTEGER northCommMode INTEGER ioUnit #ifdef EXCH_USE_SPINNING INTEGER spinCount #endif #ifdef ALLOW_USE_MPI INTEGER theProc, theTag, theType, theSize INTEGER mpiStatus(MPI_STATUS_SIZE,4), mpiRc #endif C-- Under a "put" scenario we C-- i. set completetion signal for buffer we put into. C-- ii. wait for completetion signal indicating data has been put in C-- our buffer. C-- Under a messaging mode we "receive" the message. C-- Under a "get" scenario <= not implemented, we C-- i. Check that the data is ready. C-- ii. Read the data. C-- iii. Set data read flag + memory sync. ioUnit = errorMessageUnit _BEGIN_MASTER(myThid) DO bj=1,nSy DO bi=1,nSx southCommMode = _tileCommModeS(bi,bj) northCommMode = _tileCommModeN(bi,bj) #ifdef DBUG_EXCH_VEC write(ioUnit,'(A,5I6)') 'RECV_Y,0 :',myProcId,bi,bj #endif c biN = _tileBiN(bi,bj) c bjN = _tileBjN(bi,bj) c biS = _tileBiS(bi,bj) c bjS = _tileBjS(bi,bj) IF ( southCommMode .EQ. COMM_MSG ) THEN #ifdef ALLOW_USE_MPI IF ( usingMPI ) THEN theProc = tilePidS(bi,bj) theTag = _tileTagRecvS(bi,bj) theType = _MPI_TYPE_RL theSize = myd1 CALL MPI_RECV( arrayS(1,bi,bj), theSize, theType, & theProc, theTag, MPI_COMM_MODEL, & mpiStatus, mpiRc ) ENDIF #endif /* ALLOW_USE_MPI */ ENDIF #ifdef DBUG_EXCH_VEC write(ioUnit,'(A,5I6)') 'RECV_Y,1 :',myProcId,bi,bj #endif IF ( northCommMode .EQ. COMM_MSG ) THEN #ifdef ALLOW_USE_MPI IF ( usingMPI ) THEN theProc = tilePidN(bi,bj) theTag = _tileTagRecvN(bi,bj) theType = _MPI_TYPE_RL theSize = myd1 CALL MPI_RECV( arrayN(1,bi,bj), theSize, theType, & theProc, theTag, MPI_COMM_MODEL, & mpiStatus, mpiRc ) ENDIF #endif /* ALLOW_USE_MPI */ ENDIF #ifdef DBUG_EXCH_VEC write(ioUnit,'(A,5I6)') 'RECV_Y,2 :',myProcId,bi,bj #endif ENDDO ENDDO #ifdef DBUG_EXCH_VEC write(ioUnit,'(A,5I6,I12)') 'RECV_Y:',myProcId #endif C-- Processes wait for buffers I am going to read to be ready. IF ( .NOT.exchUsesBarrier ) THEN DO bj=1,nSy DO bi=1,nSx IF ( exchNReqsY(1,bi,bj) .GT. 0 ) THEN #ifdef ALLOW_USE_MPI IF ( usingMPI ) & CALL MPI_WAITALL( exchNReqsY(1,bi,bj), exchReqIdY(1,1,bi,bj), & mpiStatus, mpiRC ) #endif /* ALLOW_USE_MPI */ ENDIF C Clear outstanding requests counter exchNReqsY(1,bi,bj) = 0 ENDDO ENDDO ENDIF _END_MASTER(myThid) C-- need to sync threads after master has received data ; C (done after mpi waitall in case waitall is really needed) _BARRIER C-- Threads wait for buffers I am going to read to be ready. C note: added BARRIER in exch_send_put S/R and here above (message C mode) so that we no longer needs this (undef EXCH_USE_SPINNING) #ifdef EXCH_USE_SPINNING IF ( exchUsesBarrier ) THEN C o On some machines ( T90 ) use system barrier rather than spinning. CALL BARRIER( myThid ) ELSE C o Spin waiting for completetion flag. This avoids a global-lock C i.e. we only lock waiting for data that we need. DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) spinCount = 0 southCommMode = _tileCommModeS(bi,bj) northCommMode = _tileCommModeN(bi,bj) #ifdef DBUG_EXCH_VEC write(ioUnit,'(A,5I6,I12)') 'spin:', myProcId,bi,bj, & southRecvAck(1,bi,bj), northRecvAck(1,bi,bj), spinCount #endif 10 CONTINUE CALL FOOL_THE_COMPILER( spinCount ) spinCount = spinCount+1 #ifdef DBUG_EXCH_VEC write(ioUnit,'(A,5I6,I12)') 'spin:', myProcId,bi,bj, & southRecvAck(1,bi,bj), northRecvAck(1,bi,bj), spinCount IF ( myThid.EQ.1 .AND. spinCount.GT. _EXCH_SPIN_LIMIT ) THEN STOP ' S/R EXCH_RECV_GET_X: spinCount > _EXCH_SPIN_LIMIT' ENDIF #endif IF ( southRecvAck(1,bi,bj) .EQ. 0 ) GOTO 10 IF ( northRecvAck(1,bi,bj) .EQ. 0 ) GOTO 10 C Clear outstanding requests southRecvAck(1,bi,bj) = 0 northRecvAck(1,bi,bj) = 0 ENDDO ENDDO ENDIF #endif /* EXCH_USE_SPINNING */ RETURN END