C $Header: /u/gcmpack/MITgcm/pkg/flt/exch_send_put_vec.F,v 1.9 2012/09/06 16:13:53 jmc Exp $
C $Name: $
#include "CPP_EEOPTIONS.h"
#undef DBUG_EXCH_VEC
C-- Contents
C-- o EXCH_SEND_PUT_VEC_X_RL
C-- o EXCH_SEND_PUT_VEC_Y_RL
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP 0
C !ROUTINE: EXCH_SEND_PUT_VEC_X_RL
C !INTERFACE:
SUBROUTINE EXCH_SEND_PUT_VEC_X_RL(
I arrayE, arrayW,
O bufRecE, bufRecW,
I myd1, myThid )
C !DESCRIPTION:
C *==========================================================*
C | SUBROUTINE EXCH_SEND_PUT_VEC_X_RL
C | o "Send" or "put" X edges for RL array.
C *==========================================================*
C | Routine that invokes actual message passing send or
C | direct "put" of data to update buffer in X direction
C | Note: Since only master-thread send/put, assumes input
C | & output arrays are shared (i.e. incommon block)
C *==========================================================*
C !USES:
IMPLICIT NONE
C == Global variables ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "EXCH.h"
C !INPUT/OUTPUT PARAMETERS:
C arrayE :: Input buffer array to send to Eastern Neighbour
C arrayW :: Input buffer array to send to Western Neighbour
C bufRecE :: buffer array to collect Eastern Neighbour values
C bufRecW :: buffer array to collect Western Neighbour values
C myd1 :: size
C myThid :: my Thread Id. number
INTEGER myd1
_RL arrayE(myd1,nSx,nSy), arrayW(myd1,nSx,nSy)
_RL bufRecE(myd1,nSx,nSy), bufRecW(myd1,nSx,nSy)
INTEGER myThid
CEOP
C !LOCAL VARIABLES:
C I :: Loop counters
C bi, bj :: tile indices
C biW, bjW :: West tile indices
C biE, bjE :: East tile indices
C theProc :: Variables used in message building
C theTag :: Variables used in message building
C theType :: Variables used in message building
C theSize :: Variables used in message building
C westCommMode :: variables holding type of communication
C eastCommMode :: a particular tile face uses.
INTEGER I
INTEGER bi, bj, biW, bjW, biE, bjE
INTEGER westCommMode
INTEGER eastCommMode
#ifdef ALLOW_USE_MPI
INTEGER theProc, theTag, theType, theSize, mpiRc
#endif
#ifdef DBUG_EXCH_VEC
INTEGER ioUnit
#endif
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
C-- Write data to exchange buffer
C Various actions are possible depending on the communication mode
C as follows:
C Mode Action
C -------- ---------------------------
C COMM_NONE Do nothing
C
C COMM_MSG Message passing communication ( e.g. MPI )
C Fill west send buffer from this tile.
C Send data with tag identifying tile and direction.
C Fill east send buffer from this tile.
C Send data with tag identifying tile and direction.
C
C COMM_PUT "Put" communication ( UMP_, shmemput, etc... )
C Fill east receive buffer of west-neighbor tile
C Fill west receive buffer of east-neighbor tile
C Sync. memory
C Write data-ready Ack for east edge of west-neighbor tile
C Write data-ready Ack for west edge of east-neighbor tile
C Sync. memory
C Prevent anyone to access shared buffer while an other thread modifies it
_BARRIER
_BEGIN_MASTER(myThid)
#ifdef DBUG_EXCH_VEC
ioUnit = errorMessageUnit
WRITE(ioUnit,'(A,2L5)')
& 'SEND_PUT_X: exchNeedsMemsync,exchUsesBarrier=',
& exchNeedsMemsync,exchUsesBarrier
#endif
DO bj=1,nSy
DO bi=1,nSx
westCommMode = _tileCommModeW(bi,bj)
eastCommMode = _tileCommModeE(bi,bj)
biE = _tileBiE(bi,bj)
bjE = _tileBjE(bi,bj)
biW = _tileBiW(bi,bj)
bjW = _tileBjW(bi,bj)
C o Send or Put west edge
IF ( westCommMode .EQ. COMM_MSG ) THEN
C Send the data
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
theProc = tilePidW(bi,bj)
theTag = _tileTagSendW(bi,bj)
theSize = myd1
theType = _MPI_TYPE_RL
exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1
#ifdef DBUG_EXCH_VEC
write(ioUnit,'(A,7I5,I8)') 'qq1xW: ',myProcId,bi,bj,
& theProc,theTag, exchNReqsX(1,bi,bj),
& exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), theSize
#endif
CALL MPI_ISEND( arrayW(1,bi,bj), theSize, theType,
& theProc, theTag, MPI_COMM_MODEL,
& exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj),
& mpiRc )
ENDIF
#endif /* ALLOW_USE_MPI */
eastRecvAck(1,biW,bjW) = 1
ELSEIF ( westCommMode .EQ. COMM_PUT ) THEN
c write(0,*) 'SEND_PUT_VEC_X: copy E:',biW,bjW,' <- W:',bi,bj
DO I=1,myd1
bufRecE(I,biW,bjW) = arrayW(I,bi,bj)
ENDDO
ELSEIF ( westCommMode .NE. COMM_NONE ) THEN
STOP ' S/R EXCH: Invalid commW mode.'
ENDIF
C o Send or Put east edge
IF ( eastCommMode .EQ. COMM_MSG ) THEN
C Send the data
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
theProc = tilePidE(bi,bj)
theTag = _tileTagSendE(bi,bj)
theSize = myd1
theType = _MPI_TYPE_RL
exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1
#ifdef DBUG_EXCH_VEC
c if (theProc .eq. 2 .or. theProc .eq. 4) then
c if (arrayE(1,bi,bj) .ne. 0.) then
write(ioUnit,'(A,7I5,I8)') 'qq1xE: ',myProcId,bi,bj,
& theProc,theTag, exchNReqsX(1,bi,bj),
& exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), theSize
c endif
c endif
#endif
CALL MPI_ISEND( arrayE(1,bi,bj), theSize, theType,
& theProc, theTag, MPI_COMM_MODEL,
& exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj),
& mpiRc)
ENDIF
#endif /* ALLOW_USE_MPI */
westRecvAck(1,biE,bjE) = 1
ELSEIF ( eastCommMode .EQ. COMM_PUT ) THEN
c write(0,*) 'SEND_PUT_VEC_X: copy W:',biE,bjE,' <- E:',bi,bj
DO I=1,myd1
bufRecW(I,biE,bjE) = arrayE(I,bi,bj)
ENDDO
ELSEIF ( eastCommMode .NE. COMM_NONE ) THEN
STOP ' S/R EXCH: Invalid commE mode.'
ENDIF
ENDDO
ENDDO
_END_MASTER(myThid)
C-- Signal completetion ( making sure system-wide memory state is
C-- consistent ).
C ** NOTE ** We are relying on being able to produce strong-ordered
C memory semantics here. In other words we assume that there is a
C mechanism which can ensure that by the time the Ack is seen the
C overlap region data that will be exchanged is up to date.
IF ( exchNeedsMemSync ) CALL MEMSYNC
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
biE = _tileBiE(bi,bj)
bjE = _tileBjE(bi,bj)
biW = _tileBiW(bi,bj)
bjW = _tileBjW(bi,bj)
westCommMode = _tileCommModeW(bi,bj)
eastCommMode = _tileCommModeE(bi,bj)
IF ( westCommMode .EQ. COMM_PUT ) eastRecvAck(1,biW,bjW) = 1
IF ( eastCommMode .EQ. COMM_PUT ) westRecvAck(1,biE,bjE) = 1
IF ( westCommMode .EQ. COMM_GET ) eastRecvAck(1,biW,bjW) = 1
IF ( eastCommMode .EQ. COMM_GET ) westRecvAck(1,biE,bjE) = 1
ENDDO
ENDDO
C-- Make sure "ack" setting is seen system-wide.
C Here strong-ordering is not an issue but we want to make
C sure that processes that might spin on the above Ack settings
C will see the setting.
C ** NOTE ** On some machines we wont spin on the Ack setting
C ( particularly the T90 ), instead we will use s system barrier.
C On the T90 the system barrier is very fast and switches out the
C thread while it waits. On most machines the system barrier
C is much too slow and if we own the machine and have one thread
C per process preemption is not a problem.
IF ( exchNeedsMemSync ) CALL MEMSYNC
C Wait until all threads finish filling buffer <-- jmc: really needed ?
_BARRIER
RETURN
END
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP 0
C !ROUTINE: EXCH_SEND_PUT_VEC_Y_RL
C !INTERFACE:
SUBROUTINE EXCH_SEND_PUT_VEC_Y_RL(
I arrayN, arrayS,
O bufRecN, bufRecS,
I myd1, myThid )
C !DESCRIPTION:
C *==========================================================*
C | SUBROUTINE EXCH_SEND_PUT_VEC_Y_RL
C | o "Send" or "put" Y edges for RL array.
C *==========================================================*
C | Routine that invokes actual message passing send or
C | direct "put" of data to update buffer in X direction
C | Note: Since only master-thread send/put, assumes input
C | & output arrays are shared (i.e. incommon block)
C *==========================================================*
C !USES:
IMPLICIT NONE
C == Global variables ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "EXCH.h"
C !INPUT/OUTPUT PARAMETERS:
C arrayN :: buffer array to collect Northern Neighbour values
C arrayS :: buffer array to collect Southern Neighbour values
C myd1 :: size
C myThid :: my Thread Id. number
C arrayN :: Input buffer array to send to Northern Neighbour
C arrayS :: Input buffer array to send to Southern Neighbour
C bufRecN :: buffer array to collect Northern Neighbour values
C bufRecS :: buffer array to collect Southern Neighbour values
C myd1 :: size
C myThid :: my Thread Id. number
INTEGER myd1
_RL arrayN(myd1,nSx,nSy), arrayS(myd1,nSx,nSy)
_RL bufRecN(myd1,nSx,nSy), bufRecS(myd1,nSx,nSy)
INTEGER myThid
CEOP
C !LOCAL VARIABLES:
C I :: Loop index
C bi, bj :: tile indices
C biS, bjS :: South tile indices
C biN, bjN :: North tile indices
C theProc :: Variables used in message building
C theTag :: Variables used in message building
C theType :: Variables used in message building
C theSize :: Variables used in message building
C southCommMode :: variables holding type of communication
C northCommMode :: a particular tile face uses.
INTEGER I
INTEGER bi, bj, biS, bjS, biN, bjN
INTEGER southCommMode
INTEGER northCommMode
#ifdef ALLOW_USE_MPI
INTEGER theProc, theTag, theType, theSize, mpiRc
#endif
#ifdef DBUG_EXCH_VEC
INTEGER ioUnit
#endif
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
C-- Write data to exchange buffer
C Various actions are possible depending on the communication mode
C as follows:
C Mode Action
C -------- ---------------------------
C COMM_NONE Do nothing
C
C COMM_MSG Message passing communication ( e.g. MPI )
C Fill west send buffer from this tile.
C Send data with tag identifying tile and direction.
C Fill east send buffer from this tile.
C Send data with tag identifying tile and direction.
C
C COMM_PUT "Put" communication ( UMP_, shmemput, etc... )
C Fill east receive buffer of south-neighbor tile
C Fill west receive buffer of north-neighbor tile
C Sync. memory
C Write data-ready Ack for east edge of south-neighbor tile
C Write data-ready Ack for west edge of north-neighbor tile
C Sync. memory
C Prevent anyone to access shared buffer while an other thread modifies it
_BARRIER
_BEGIN_MASTER(myThid)
#ifdef DBUG_EXCH_VEC
ioUnit = errorMessageUnit
#endif
DO bj=1,nSy
DO bi=1,nSx
southCommMode = _tileCommModeS(bi,bj)
northCommMode = _tileCommModeN(bi,bj)
biN = _tileBiN(bi,bj)
bjN = _tileBjN(bi,bj)
biS = _tileBiS(bi,bj)
bjS = _tileBjS(bi,bj)
C o Send or Put south edge
IF ( southCommMode .EQ. COMM_MSG ) THEN
C Send the data
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
theProc = tilePidS(bi,bj)
theTag = _tileTagSendS(bi,bj)
theSize = myd1
theType = _MPI_TYPE_RL
exchNReqsY(1,bi,bj) = exchNReqsY(1,bi,bj)+1
CALL MPI_ISEND( arrayS(1,bi,bj), theSize, theType,
& theProc, theTag, MPI_COMM_MODEL,
& exchReqIdY(exchNReqsY(1,bi,bj),1,bi,bj),
& mpiRc )
ENDIF
#endif /* ALLOW_USE_MPI */
northRecvAck(1,biS,bjS) = 1
ELSEIF ( southCommMode .EQ. COMM_PUT ) THEN
c write(0,*) 'SEND_PUT_VEC_Y: copy N:',biS,bjS,' <- S:',bi,bj
DO I=1,myd1
bufRecN(I,biS,bjS) = arrayS(I,bi,bj)
ENDDO
ELSEIF ( southCommMode .NE. COMM_NONE ) THEN
STOP ' S/R EXCH: Invalid commS mode.'
ENDIF
C o Send or Put north edge
IF ( northCommMode .EQ. COMM_MSG ) THEN
C Send the data
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
theProc = tilePidN(bi,bj)
theTag = _tileTagSendN(bi,bj)
theSize = myd1
theType = _MPI_TYPE_RL
exchNReqsY(1,bi,bj) = exchNReqsY(1,bi,bj)+1
CALL MPI_ISEND( arrayN(1,bi,bj), theSize, theType,
& theProc, theTag, MPI_COMM_MODEL,
& exchReqIdY(exchNReqsY(1,bi,bj),1,bi,bj),
& mpiRc )
ENDIF
#endif /* ALLOW_USE_MPI */
southRecvAck(1,biN,bjN) = 1
ELSEIF ( northCommMode .EQ. COMM_PUT ) THEN
c write(0,*) 'SEND_PUT_VEC_Y: copy S:',biN,bjN,' <- N:',bi,bj
DO I=1,myd1
bufRecS(I,biN,bjN) = arrayN(I,bi,bj)
ENDDO
ELSEIF ( northCommMode .NE. COMM_NONE ) THEN
STOP ' S/R EXCH: Invalid commN mode.'
ENDIF
ENDDO
ENDDO
_END_MASTER(myThid)
C-- Signal completetion ( making sure system-wide memory state is
C-- consistent ).
C ** NOTE ** We are relying on being able to produce strong-ordered
C memory semantics here. In other words we assume that there is a
C mechanism which can ensure that by the time the Ack is seen the
C overlap region data that will be exchanged is up to date.
IF ( exchNeedsMemSync ) CALL MEMSYNC
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
biN = _tileBiN(bi,bj)
bjN = _tileBjN(bi,bj)
biS = _tileBiS(bi,bj)
bjS = _tileBjS(bi,bj)
southCommMode = _tileCommModeS(bi,bj)
northCommMode = _tileCommModeN(bi,bj)
IF ( southCommMode .EQ. COMM_PUT ) northRecvAck(1,biS,bjS) = 1
IF ( northCommMode .EQ. COMM_PUT ) southRecvAck(1,biN,bjN) = 1
IF ( southCommMode .EQ. COMM_GET ) northRecvAck(1,biS,bjS) = 1
IF ( northCommMode .EQ. COMM_GET ) southRecvAck(1,biN,bjN) = 1
ENDDO
ENDDO
C-- Make sure "ack" setting is seen system-wide.
C Here strong-ordering is not an issue but we want to make
C sure that processes that might spin on the above Ack settings
C will see the setting.
C ** NOTE ** On some machines we wont spin on the Ack setting
C ( particularly the T90 ), instead we will use s system barrier.
C On the T90 the system barrier is very fast and switches out the
C thread while it waits. On most machines the system barrier
C is much too slow and if we own the machine and have one thread
C per process preemption is not a problem.
IF ( exchNeedsMemSync ) CALL MEMSYNC
C Wait until all threads finish filling buffer <-- jmc: really needed ?
_BARRIER
RETURN
END