./eesupp/src/global_vec

C $Header: /u/gcmpack/MITgcm/eesupp/src/global_vec_sum.F,v 1.4 2006/08/12 03:10:26 edhill Exp $
C $Name:  $

C     Perform a global sum on an array of threaded vectors.
C     
C     Contents
C     o global_sum_r4
C     o global_sum_r8

#include "CPP_EEOPTIONS.h"

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: GLOBAL_VEC_SUM_R4

C     !INTERFACE:
      SUBROUTINE GLOBAL_VEC_SUM_R4( 
     I     ndim, nval,
     U     sumPhi,
     I     myThid )

C     !DESCRIPTION:
C     Sum the vector over threads and then sum the result over all MPI
C     processes.  Within a process only one thread does the sum, each
C     thread is assumed to have already summed its local data.  The same
C     thread also does the inter-process sum for example with MPI and
C     then writes the result into a shared location. All threads wait
C     until the sum is available.

C     !USES:
      IMPLICIT NONE
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"

C     !INPUT PARAMETERS:
C     sumPhi   :: input/output array
C     myThid   :: thread ID
      INTEGER ndim, nval, myThid
      Real*4 sumPhi(ndim,nSx,nSy)
CEOP

C     !LOCAL VARIABLES:
C     mpiRC    :: MPI return code
      INTEGER i, bi,bj
      Real*4  tmp1(nval), tmp2(nval)
#ifdef   ALLOW_USE_MPI
      INTEGER mpiRC
#endif /* ALLOW_USE_MPI */

C     Empty the temp arrays
      DO i = 1,nval
        tmp1(i) = 0. _d 0
        tmp2(i) = 0. _d 0
      ENDDO

      _BEGIN_MASTER( myThid )

C     Sum over all threads
      DO bj = 1,nSy
        DO bi = 1,nSx
          DO i = 1,nval
            tmp1(i) = tmp1(i) + sumPhi( i, bi,bj ) 
          ENDDO
        ENDDO
      ENDDO

C     Copy to the first temp array to the second temp array to handle
C     the case where MPI is not used
      DO i = 1,nval
        tmp2(i) = tmp1(i)
      ENDDO

C     Invoke MPI if necessary
#ifdef  ALLOW_USE_MPI
#ifndef ALWAYS_USE_MPI
      IF ( usingMPI ) THEN
#endif
        CALL MPI_ALLREDUCE(tmp1,tmp2,nval,MPI_REAL,
     &       MPI_SUM,MPI_COMM_MODEL,mpiRC)
#ifndef ALWAYS_USE_MPI
      ENDIF
#endif
#endif /*  ALLOW_USE_MPI */
      
C     Copy the results to the first location of the input array
      DO i = 1,nval
        sumPhi( i, 1,1 ) = tmp2(i)
      ENDDO

      _END_MASTER( myThid )
      _BARRIER

      RETURN
      END

      
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: GLOBAL_VEC_SUM_R8

C     !INTERFACE:
      SUBROUTINE GLOBAL_VEC_SUM_R8( 
     I     ndim, nval,
     U     sumPhi,
     I     myThid )

C     !DESCRIPTION:
C     Sum the vector over threads and then sum the result over all MPI
C     processes.  Within a process only one thread does the sum, each
C     thread is assumed to have already summed its local data.  The same
C     thread also does the inter-process sum for example with MPI and
C     then writes the result into a shared location. All threads wait
C     until the sum is avaiailable.

C     !USES:
      IMPLICIT NONE 
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"

C     !INPUT PARAMETERS:
C     sumPhi   :: input/output array
C     myThid   :: thread ID
      INTEGER ndim, nval, myThid
      Real*8 sumPhi(ndim,nSx,nSy)
CEOP

C     !LOCAL VARIABLES:
C     mpiRC    :: MPI return code
      INTEGER i, bi,bj
      Real*8  tmp1(nval), tmp2(nval)
#ifdef   ALLOW_USE_MPI
      INTEGER mpiRC
#endif /* ALLOW_USE_MPI */

C     Empty the temp arrays
      DO i = 1,nval
        tmp1(i) = 0. _d 0
        tmp2(i) = 0. _d 0
      ENDDO

      _BEGIN_MASTER( myThid )

C     Sum over all threads
      DO bj = 1,nSy
        DO bi = 1,nSx
          DO i = 1,nval
            tmp1(i) = tmp1(i) + sumPhi( i, bi,bj ) 
          ENDDO
        ENDDO
      ENDDO

C     Copy to the first temp array to the second temp array to handle
C     the case where MPI is not used
      DO i = 1,nval
        tmp2(i) = tmp1(i)
      ENDDO

C     Invoke MPI if necessary
#ifdef  ALLOW_USE_MPI
#ifndef ALWAYS_USE_MPI
      IF ( usingMPI ) THEN
#endif
        CALL MPI_ALLREDUCE(tmp1,tmp2,nval,MPI_DOUBLE_PRECISION,
     &       MPI_SUM,MPI_COMM_MODEL,mpiRC)
#ifndef ALWAYS_USE_MPI
      ENDIF
#endif
#endif /*  ALLOW_USE_MPI */
      
C     Copy the results to the first location of the input array
      DO i = 1,nval
        sumPhi( i, 1,1 ) = tmp2(i)
      ENDDO

      _END_MASTER( myThid )
      _BARRIER

      RETURN
      END

      
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: GLOBAL_VEC_SUM_INT

C     !INTERFACE:
      SUBROUTINE GLOBAL_VEC_SUM_INT( 
     I     ndim, nval,
     U     sumPhi,
     I     myThid )

C     !DESCRIPTION:
C     Sum the vector over threads and then sum the result over all MPI
C     processes.  Within a process only one thread does the sum, each
C     thread is assumed to have already summed its local data.  The same
C     thread also does the inter-process sum for example with MPI and
C     then writes the result into a shared location. All threads wait
C     until the sum is avaiailable.

C     !USES:
      IMPLICIT NONE 
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"

C     !INPUT PARAMETERS:
C     sumPhi   :: input/output array
C     myThid   :: thread ID
      INTEGER ndim, nval, myThid
      INTEGER sumPhi(ndim,nSx,nSy)
CEOP

C     !LOCAL VARIABLES:
C     mpiRC    :: MPI return code
      INTEGER i, bi,bj
      INTEGER tmp1(nval), tmp2(nval)
#ifdef   ALLOW_USE_MPI
      INTEGER mpiRC
#endif /* ALLOW_USE_MPI */


C     Empty the temp arrays
      DO i = 1,nval
        tmp1(i) = 0. _d 0
        tmp2(i) = 0. _d 0
      ENDDO

      _BEGIN_MASTER( myThid )

C     Sum over all threads
      DO bj = 1,nSy
        DO bi = 1,nSx
          DO i = 1,nval
            tmp1(i) = tmp1(i) + sumPhi( i, bi,bj ) 
          ENDDO
        ENDDO
      ENDDO

C     Copy to the first temp array to the second temp array to handle
C     the case where MPI is not used
      DO i = 1,nval
        tmp2(i) = tmp1(i)
      ENDDO

C     Invoke MPI if necessary
#ifdef  ALLOW_USE_MPI
#ifndef ALWAYS_USE_MPI
      IF ( usingMPI ) THEN
#endif
        CALL MPI_ALLREDUCE(tmp1,tmp2,nval,MPI_INTEGER,
     &       MPI_SUM,MPI_COMM_MODEL,mpiRC)
#ifndef ALWAYS_USE_MPI
      ENDIF
#endif
#endif /*  ALLOW_USE_MPI */
      
C     Copy the results to the first location of the input array
      DO i = 1,nval
        sumPhi( i, 1,1 ) = tmp2(i)
      ENDDO

      _END_MASTER( myThid )
      _BARRIER

      RETURN
      END

      
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|