C $Header: /u/gcmpack/MITgcm/eesupp/src/global_vec_sum.F,v 1.3 2006/05/23 14:55:19 edhill Exp $
C $Name:  $

C     Perform a global sum on an array of threaded vectors.
C     
C     Contents
C     o global_sum_r4
C     o global_sum_r8

#include "CPP_EEOPTIONS.h"

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: GLOBAL_VEC_SUM_R4

C     !INTERFACE:
      SUBROUTINE GLOBAL_VEC_SUM_R4( 
     I     nval,
     U     sumPhi,
     I     myThid )

C     !DESCRIPTION:
C     Sum the vector over threads and then sum the result over all MPI
C     processes.  Within a process only one thread does the sum, each
C     thread is assumed to have already summed its local data.  The same
C     thread also does the inter-process sum for example with MPI and
C     then writes the result into a shared location. All threads wait
C     until the sum is available.

C     !USES:
      IMPLICIT NONE
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"

C     !INPUT PARAMETERS:
C     sumPhi - Result of sum.
C     myThid - My thread id.
      INTEGER nval, myThid
      Real*4 sumPhi(nval)
CEOP

C     !LOCAL VARIABLES:
C     I      - Loop counters
C     mpiRC  - MPI return code
      INTEGER I, j, it, ntot, nloop, ni
      Real*4  tmp1(MAX_VGS), tmp2(MAX_VGS)
#ifdef   ALLOW_USE_MPI
      INTEGER mpiRC
#endif /* ALLOW_USE_MPI */


      nloop = nval/MAX_VGS + 1
      ntot = 0
      DO it = 1,nloop

        ni = nval - ntot
        IF ( ni .GT. MAX_VGS )  ni = MAX_VGS
        IF ( ni .LT. 1 ) GOTO 999

C       write local sum into array
        CALL BAR2( myThid )
        DO j = 1,ni
          phivGSRS(j,myThid) = sumPhi(ntot + j)
        ENDDO
C       Can not start until everyone is ready
        CALL BAR2( myThid )

C       Sum within the process first
        _BEGIN_MASTER( myThid )

        DO j = 1,ni
          tmp1(j) = 0.
        ENDDO
        DO I = 1,nThreads
          DO j = 1,ni
            tmp1(j) = tmp1(j) + phiGSRS(j,I)
          ENDDO
        ENDDO

#ifdef  ALLOW_USE_MPI
#ifndef ALWAYS_USE_MPI
        IF ( usingMPI ) THEN
#endif
          CALL MPI_ALLREDUCE(tmp1,tmp2,ni,MPI_REAL,MPI_SUM,
     &         MPI_COMM_MODEL,mpiRC)
#ifndef ALWAYS_USE_MPI
        ENDIF
#endif
#endif /*  ALLOW_USE_MPI */

C       Write solution to place where all threads can see it
        DO j = 1,ni
          phivGSRS(j,myThid) = tmp2(j)
        ENDDO

        _END_MASTER( myThid )
        
        CALL BAR2( myThid )        
C       set result for every process
        DO j = 1,ni
          sumPhi(ntot + j) = phivGSRS(j,1)
        ENDDO
        CALL BAR2( myThid )

        ntot = ntot + ni

      ENDDO

 999  CONTINUE
      RETURN
      END


C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP C !ROUTINE: GLOBAL_VEC_SUM_R8 C !INTERFACE: SUBROUTINE GLOBAL_VEC_SUM_R8( I nval, U sumPhi, I myThid ) C !DESCRIPTION: C Sum the vector over threads and then sum the result over all MPI C processes. Within a process only one thread does the sum, each C thread is assumed to have already summed its local data. The same C thread also does the inter-process sum for example with MPI and C then writes the result into a shared location. All threads wait C until the sum is avaiailable. C !USES: IMPLICIT NONE #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "GLOBAL_SUM.h" C !INPUT PARAMETERS: C sumPhi - Result of sum. C myThid - My thread id. INTEGER nval, myThid Real*8 sumPhi(nval) CEOP C !LOCAL VARIABLES: C I - Loop counters C mpiRC - MPI return code INTEGER I, j, it, ntot, nloop, ni Real*8 tmp1(MAX_VGS), tmp2(MAX_VGS) #ifdef ALLOW_USE_MPI INTEGER mpiRC #endif /* ALLOW_USE_MPI */ nloop = nval/MAX_VGS + 1 ntot = 0 DO it = 1,nloop ni = nval - ntot IF ( ni .GT. MAX_VGS ) ni = MAX_VGS IF ( ni .LT. 1 ) GOTO 999 C write local sum into array CALL BAR2( myThid ) DO j = 1,ni phivGSRL(j,myThid) = sumPhi(ntot + j) ENDDO C Can not start until everyone is ready CALL BAR2( myThid ) C Sum within the process first _BEGIN_MASTER( myThid ) DO j = 1,ni tmp1(j) = 0. ENDDO DO I = 1,nThreads DO j = 1,ni tmp1(j) = tmp1(j) + phiGSRS(j,I) ENDDO ENDDO #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif CALL MPI_ALLREDUCE(tmp1,tmp2,ni,MPI_DOUBLE_PRECISION, & MPI_SUM,MPI_COMM_MODEL,mpiRC) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ C Write solution to place where all threads can see it DO j = 1,ni phivGSRL(j,myThid) = tmp2(j) ENDDO _END_MASTER( myThid ) CALL BAR2( myThid ) C set result for every process DO j = 1,ni sumPhi(ntot + j) = phivGSRL(j,1) ENDDO CALL BAR2( myThid ) ntot = ntot + ni ENDDO 999 CONTINUE RETURN END


C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP C !ROUTINE: GLOBAL_VEC_SUM_INT C !INTERFACE: SUBROUTINE GLOBAL_VEC_SUM_INT( I nval, U sumPhi, I myThid ) C !DESCRIPTION: C Sum the vector over threads and then sum the result over all MPI C processes. Within a process only one thread does the sum, each C thread is assumed to have already summed its local data. The same C thread also does the inter-process sum for example with MPI and C then writes the result into a shared location. All threads wait C until the sum is avaiailable. C !USES: IMPLICIT NONE #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "GLOBAL_SUM.h" C !INPUT PARAMETERS: C sumPhi - Result of sum. C myThid - My thread id. INTEGER nval, myThid INTEGER sumPhi(nval) CEOP C !LOCAL VARIABLES: C I - Loop counters C mpiRC - MPI return code INTEGER I, j, it, ntot, nloop, ni INTEGER tmp1(MAX_VGS), tmp2(MAX_VGS) #ifdef ALLOW_USE_MPI INTEGER mpiRC #endif /* ALLOW_USE_MPI */ nloop = nval/MAX_VGS + 1 ntot = 0 DO it = 1,nloop ni = nval - ntot IF ( ni .GT. MAX_VGS ) ni = MAX_VGS IF ( ni .LT. 1 ) GOTO 999 C write local sum into array CALL BAR2( myThid ) DO j = 1,ni phivGSI(j,myThid) = sumPhi(ntot + j) ENDDO C Can not start until everyone is ready CALL BAR2( myThid ) C Sum within the process first _BEGIN_MASTER( myThid ) DO j = 1,ni tmp1(j) = 0. ENDDO DO I = 1,nThreads DO j = 1,ni tmp1(j) = tmp1(j) + phiGSRS(j,I) ENDDO ENDDO #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif CALL MPI_ALLREDUCE(tmp1,tmp2,ni,MPI_INTEGER,MPI_SUM, & MPI_COMM_MODEL,mpiRC) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ C Write solution to place where all threads can see it DO j = 1,ni phivGSI(j,myThid) = tmp2(j) ENDDO _END_MASTER( myThid ) CALL BAR2( myThid ) C set result for every process DO j = 1,ni sumPhi(ntot + j) = phivGSI(j,1) ENDDO CALL BAR2( myThid ) ntot = ntot + ni ENDDO 999 CONTINUE RETURN END


C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|