C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_singlecpu.F,v 1.5 2012/09/03 19:36:29 jmc Exp $ C $Name: $ #include "PACKAGES_CONFIG.h" #include "CPP_EEOPTIONS.h" C-- File global_sum_singlecpu.F: Routines that perform global sum C on a single CPU C Contents C o GLOBAL_SUM_SINGLECPU_RL C o GLOBAL_SUM_SINGLECPU_RS <- not yet coded C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP C !ROUTINE: GLOBAL_SUM_SINGLECPU_RL C !INTERFACE: SUBROUTINE GLOBAL_SUM_SINGLECPU_RL( I phiLocal, O sumPhi, I oLi, oLj, myThid ) IMPLICIT NONE C !DESCRIPTION: C *==========================================================* C | SUBROUTINE GLOBAL\_SUM\_SINGLECPU\_RL C | o Handle sum for _RL data. C *==========================================================* C | Global sum of 2d array C | independent of tiling as sum is performed on a single CPU C | sum is performed in REAL*8 C *==========================================================* C !USES: C == Global data == #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "GLOBAL_SUM.h" #ifdef ALLOW_EXCH2 #include "W2_EXCH2_SIZE.h" #include "W2_EXCH2_TOPOLOGY.h" #endif #include "EEBUFF_SCPU.h" C !INPUT/OUTPUT PARAMETERS: C == Routine arguments == C phiLocal :: local input array without overlap regions. C sumPhi :: Result of sum. C oLi, oLj :: overlap size of input array in I & J direction. C myThid :: My thread id. INTEGER oLi, oLj _RL phiLocal(1-oLi:sNx+oLi,1-oLj:sNy+oLj,nSx,nSy) _RL sumPhi INTEGER myThid C !LOCAL VARIABLES: C == Local variables == C- type declaration of: sumAll, globalBuf : C sumAll needs to have the same length as MPI_DOUBLE_PRECISION LOGICAL useExch2GlobLayOut, zeroBuff INTEGER xSize, ySize INTEGER i, j, ij INTEGER bi, bj Real*8 sumAll #ifdef ALLOW_USE_MPI INTEGER pId, idest, itag INTEGER istatus(MPI_STATUS_SIZE), ierr #endif /* ALLOW_USE_MPI */ CEOP #ifdef ALLOW_EXCH2 zeroBuff = .TRUE. useExch2GlobLayOut = .TRUE. xSize = exch2_global_Nx ySize = exch2_global_Ny #else /* ALLOW_EXCH2 */ zeroBuff = .FALSE. useExch2GlobLayOut = .FALSE. xSize = Nx ySize = Ny #endif /* ALLOW_EXCH2 */ #ifdef ALLOW_USE_MPI idest = 0 itag = 0 #endif C-- copy (and conversion to real*8) to Shared buffer: DO bj = myByLo(myThid), myByHi(myThid) DO bi = myBxLo(myThid), myBxHi(myThid) DO j=1,sNy DO i=1,sNx sharedLocBuf_r8(i,j,bi,bj) = phiLocal(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO C-- Master thread does the communications and the global sum C-- Master thread cannot start until everyone is ready: CALL BAR2( myThid ) _BEGIN_MASTER( myThid ) C-- Gather local arrays CALL GATHER_2D_R8( O xy_buffer_r8, I sharedLocBuf_r8, I xSize, ySize, I useExch2GlobLayOut, zeroBuff, myThid ) IF ( myProcId.EQ.0 ) THEN C-- Process 0 sums the global array sumAll = 0. _d 0 DO ij=1,xSize*ySize sumAll = sumAll + xy_buffer_r8(ij) ENDDO #ifdef ALLOW_USE_MPI C-- Process 0 sends result to all other processes IF ( usingMPI ) THEN DO pId = 1, (nPx*nPy)-1 CALL MPI_SEND (sumAll, 1, MPI_DOUBLE_PRECISION, & pId, itag, MPI_COMM_MODEL, ierr) ENDDO ENDIF ELSEIF ( usingMPI ) THEN C-- All proceses except 0 receive result from process 0 CALL MPI_RECV (sumAll, 1, MPI_DOUBLE_PRECISION, & idest, itag, MPI_COMM_MODEL, istatus, ierr) #endif /* ALLOW_USE_MPI */ ENDIF C-- Write solution to shared buffer (all threads can see it) phiGSR8(1,0) = sumAll _END_MASTER( myThid ) C-- Everyone wait for Master thread to be ready CALL BAR2( myThid ) C-- set result for every threads sumPhi = phiGSR8(1,0) RETURN END