C $Header: /u/gcmpack/MITgcm/eesupp/src/global_vec_sum.F,v 1.4 2006/08/12 03:10:26 edhill Exp $ C $Name: $ C Perform a global sum on an array of threaded vectors. C C Contents C o global_sum_r4 C o global_sum_r8 #include "CPP_EEOPTIONS.h" C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP C !ROUTINE: GLOBAL_VEC_SUM_R4 C !INTERFACE: SUBROUTINE GLOBAL_VEC_SUM_R4( I ndim, nval, U sumPhi, I myThid ) C !DESCRIPTION: C Sum the vector over threads and then sum the result over all MPI C processes. Within a process only one thread does the sum, each C thread is assumed to have already summed its local data. The same C thread also does the inter-process sum for example with MPI and C then writes the result into a shared location. All threads wait C until the sum is available. C !USES: IMPLICIT NONE #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "GLOBAL_SUM.h" C !INPUT PARAMETERS: C sumPhi :: input/output array C myThid :: thread ID INTEGER ndim, nval, myThid Real*4 sumPhi(ndim,nSx,nSy) CEOP C !LOCAL VARIABLES: C mpiRC :: MPI return code INTEGER i, bi,bj Real*4 tmp1(nval), tmp2(nval) #ifdef ALLOW_USE_MPI INTEGER mpiRC #endif /* ALLOW_USE_MPI */ C Empty the temp arrays DO i = 1,nval tmp1(i) = 0. _d 0 tmp2(i) = 0. _d 0 ENDDO _BEGIN_MASTER( myThid ) C Sum over all threads DO bj = 1,nSy DO bi = 1,nSx DO i = 1,nval tmp1(i) = tmp1(i) + sumPhi( i, bi,bj ) ENDDO ENDDO ENDDO C Copy to the first temp array to the second temp array to handle C the case where MPI is not used DO i = 1,nval tmp2(i) = tmp1(i) ENDDO C Invoke MPI if necessary #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif CALL MPI_ALLREDUCE(tmp1,tmp2,nval,MPI_REAL, & MPI_SUM,MPI_COMM_MODEL,mpiRC) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ C Copy the results to the first location of the input array DO i = 1,nval sumPhi( i, 1,1 ) = tmp2(i) ENDDO _END_MASTER( myThid ) _BARRIER RETURN END
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP C !ROUTINE: GLOBAL_VEC_SUM_R8 C !INTERFACE: SUBROUTINE GLOBAL_VEC_SUM_R8( I ndim, nval, U sumPhi, I myThid ) C !DESCRIPTION: C Sum the vector over threads and then sum the result over all MPI C processes. Within a process only one thread does the sum, each C thread is assumed to have already summed its local data. The same C thread also does the inter-process sum for example with MPI and C then writes the result into a shared location. All threads wait C until the sum is avaiailable. C !USES: IMPLICIT NONE #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "GLOBAL_SUM.h" C !INPUT PARAMETERS: C sumPhi :: input/output array C myThid :: thread ID INTEGER ndim, nval, myThid Real*8 sumPhi(ndim,nSx,nSy) CEOP C !LOCAL VARIABLES: C mpiRC :: MPI return code INTEGER i, bi,bj Real*8 tmp1(nval), tmp2(nval) #ifdef ALLOW_USE_MPI INTEGER mpiRC #endif /* ALLOW_USE_MPI */ C Empty the temp arrays DO i = 1,nval tmp1(i) = 0. _d 0 tmp2(i) = 0. _d 0 ENDDO _BEGIN_MASTER( myThid ) C Sum over all threads DO bj = 1,nSy DO bi = 1,nSx DO i = 1,nval tmp1(i) = tmp1(i) + sumPhi( i, bi,bj ) ENDDO ENDDO ENDDO C Copy to the first temp array to the second temp array to handle C the case where MPI is not used DO i = 1,nval tmp2(i) = tmp1(i) ENDDO C Invoke MPI if necessary #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif CALL MPI_ALLREDUCE(tmp1,tmp2,nval,MPI_DOUBLE_PRECISION, & MPI_SUM,MPI_COMM_MODEL,mpiRC) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ C Copy the results to the first location of the input array DO i = 1,nval sumPhi( i, 1,1 ) = tmp2(i) ENDDO _END_MASTER( myThid ) _BARRIER RETURN END
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CBOP C !ROUTINE: GLOBAL_VEC_SUM_INT C !INTERFACE: SUBROUTINE GLOBAL_VEC_SUM_INT( I ndim, nval, U sumPhi, I myThid ) C !DESCRIPTION: C Sum the vector over threads and then sum the result over all MPI C processes. Within a process only one thread does the sum, each C thread is assumed to have already summed its local data. The same C thread also does the inter-process sum for example with MPI and C then writes the result into a shared location. All threads wait C until the sum is avaiailable. C !USES: IMPLICIT NONE #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "GLOBAL_SUM.h" C !INPUT PARAMETERS: C sumPhi :: input/output array C myThid :: thread ID INTEGER ndim, nval, myThid INTEGER sumPhi(ndim,nSx,nSy) CEOP C !LOCAL VARIABLES: C mpiRC :: MPI return code INTEGER i, bi,bj INTEGER tmp1(nval), tmp2(nval) #ifdef ALLOW_USE_MPI INTEGER mpiRC #endif /* ALLOW_USE_MPI */ C Empty the temp arrays DO i = 1,nval tmp1(i) = 0. _d 0 tmp2(i) = 0. _d 0 ENDDO _BEGIN_MASTER( myThid ) C Sum over all threads DO bj = 1,nSy DO bi = 1,nSx DO i = 1,nval tmp1(i) = tmp1(i) + sumPhi( i, bi,bj ) ENDDO ENDDO ENDDO C Copy to the first temp array to the second temp array to handle C the case where MPI is not used DO i = 1,nval tmp2(i) = tmp1(i) ENDDO C Invoke MPI if necessary #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif CALL MPI_ALLREDUCE(tmp1,tmp2,nval,MPI_INTEGER, & MPI_SUM,MPI_COMM_MODEL,mpiRC) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ C Copy the results to the first location of the input array DO i = 1,nval sumPhi( i, 1,1 ) = tmp2(i) ENDDO _END_MASTER( myThid ) _BARRIER RETURN END
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|