C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_singlecpu.F,v 1.5 2012/09/03 19:36:29 jmc Exp $
C $Name: $
#include "PACKAGES_CONFIG.h"
#include "CPP_EEOPTIONS.h"
C-- File global_sum_singlecpu.F: Routines that perform global sum
C on a single CPU
C Contents
C o GLOBAL_SUM_SINGLECPU_RL
C o GLOBAL_SUM_SINGLECPU_RS <- not yet coded
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C !ROUTINE: GLOBAL_SUM_SINGLECPU_RL
C !INTERFACE:
SUBROUTINE GLOBAL_SUM_SINGLECPU_RL(
I phiLocal,
O sumPhi,
I oLi, oLj, myThid )
IMPLICIT NONE
C !DESCRIPTION:
C *==========================================================*
C | SUBROUTINE GLOBAL\_SUM\_SINGLECPU\_RL
C | o Handle sum for _RL data.
C *==========================================================*
C | Global sum of 2d array
C | independent of tiling as sum is performed on a single CPU
C | sum is performed in REAL*8
C *==========================================================*
C !USES:
C == Global data ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"
#ifdef ALLOW_EXCH2
#include "W2_EXCH2_SIZE.h"
#include "W2_EXCH2_TOPOLOGY.h"
#endif
#include "EEBUFF_SCPU.h"
C !INPUT/OUTPUT PARAMETERS:
C == Routine arguments ==
C phiLocal :: local input array without overlap regions.
C sumPhi :: Result of sum.
C oLi, oLj :: overlap size of input array in I & J direction.
C myThid :: My thread id.
INTEGER oLi, oLj
_RL phiLocal(1-oLi:sNx+oLi,1-oLj:sNy+oLj,nSx,nSy)
_RL sumPhi
INTEGER myThid
C !LOCAL VARIABLES:
C == Local variables ==
C- type declaration of: sumAll, globalBuf :
C sumAll needs to have the same length as MPI_DOUBLE_PRECISION
LOGICAL useExch2GlobLayOut, zeroBuff
INTEGER xSize, ySize
INTEGER i, j, ij
INTEGER bi, bj
Real*8 sumAll
#ifdef ALLOW_USE_MPI
INTEGER pId, idest, itag
INTEGER istatus(MPI_STATUS_SIZE), ierr
#endif /* ALLOW_USE_MPI */
CEOP
#ifdef ALLOW_EXCH2
zeroBuff = .TRUE.
useExch2GlobLayOut = .TRUE.
xSize = exch2_global_Nx
ySize = exch2_global_Ny
#else /* ALLOW_EXCH2 */
zeroBuff = .FALSE.
useExch2GlobLayOut = .FALSE.
xSize = Nx
ySize = Ny
#endif /* ALLOW_EXCH2 */
#ifdef ALLOW_USE_MPI
idest = 0
itag = 0
#endif
C-- copy (and conversion to real*8) to Shared buffer:
DO bj = myByLo(myThid), myByHi(myThid)
DO bi = myBxLo(myThid), myBxHi(myThid)
DO j=1,sNy
DO i=1,sNx
sharedLocBuf_r8(i,j,bi,bj) = phiLocal(i,j,bi,bj)
ENDDO
ENDDO
ENDDO
ENDDO
C-- Master thread does the communications and the global sum
C-- Master thread cannot start until everyone is ready:
CALL BAR2( myThid )
_BEGIN_MASTER( myThid )
C-- Gather local arrays
CALL GATHER_2D_R8(
O xy_buffer_r8,
I sharedLocBuf_r8,
I xSize, ySize,
I useExch2GlobLayOut, zeroBuff, myThid )
IF ( myProcId.EQ.0 ) THEN
C-- Process 0 sums the global array
sumAll = 0. _d 0
DO ij=1,xSize*ySize
sumAll = sumAll + xy_buffer_r8(ij)
ENDDO
#ifdef ALLOW_USE_MPI
C-- Process 0 sends result to all other processes
IF ( usingMPI ) THEN
DO pId = 1, (nPx*nPy)-1
CALL MPI_SEND (sumAll, 1, MPI_DOUBLE_PRECISION,
& pId, itag, MPI_COMM_MODEL, ierr)
ENDDO
ENDIF
ELSEIF ( usingMPI ) THEN
C-- All proceses except 0 receive result from process 0
CALL MPI_RECV (sumAll, 1, MPI_DOUBLE_PRECISION,
& idest, itag, MPI_COMM_MODEL, istatus, ierr)
#endif /* ALLOW_USE_MPI */
ENDIF
C-- Write solution to shared buffer (all threads can see it)
phiGSR8(1,0) = sumAll
_END_MASTER( myThid )
C-- Everyone wait for Master thread to be ready
CALL BAR2( myThid )
C-- set result for every threads
sumPhi = phiGSR8(1,0)
RETURN
END