C $Header: /u/gcmpack/MITgcm/model/src/cg3d.F,v 1.23 2010/01/17 21:55:48 jmc Exp $
C $Name: $
#include "CPP_OPTIONS.h"
#ifdef TARGET_NEC_SX
C set a sensible default for the outer loop unrolling parameter that can
C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
#ifndef CG3D_OUTERLOOPITERS
# define CG3D_OUTERLOOPITERS 10
#endif
#endif /* TARGET_NEC_SX */
CBOP
C !ROUTINE: CG3D
C !INTERFACE:
SUBROUTINE CG3D(
I cg3d_b,
U cg3d_x,
O firstResidual,
O lastResidual,
U numIters,
I myIter, myThid )
C !DESCRIPTION: \bv
C *==========================================================*
C | SUBROUTINE CG3D
C | o Three-dimensional grid problem conjugate-gradient
C | inverter (with preconditioner).
C *==========================================================*
C | Con. grad is an iterative procedure for solving Ax = b.
C | It requires the A be symmetric.
C | This implementation assumes A is a seven-diagonal
C | matrix of the form that arises in the discrete
C | representation of the del^2 operator in a
C | three-dimensional space.
C | Notes:
C | ======
C | This implementation can support shared-memory
C | multi-threaded execution. In order to do this COMMON
C | blocks are used for many of the arrays - even ones that
C | are only used for intermedaite results. This design is
C | OK if you want to all the threads to collaborate on
C | solving the same problem. On the other hand if you want
C | the threads to solve several different problems
C | concurrently this implementation will not work.
C *==========================================================*
C \ev
C !USES:
IMPLICIT NONE
C === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"
#include "SURFACE.h"
#include "CG3D.h"
C !INPUT/OUTPUT PARAMETERS:
C === Routine arguments ===
C cg3d_b :: The source term or "right hand side"
C cg3d_x :: The solution
C firstResidual :: the initial residual before any iterations
C lastResidual :: the actual residual reached
C numIters :: Entry: the maximum number of iterations allowed
C :: Exit: the actual number of iterations used
C myIter :: Current iteration number in simulation
C myThid :: my Thread Id number
_RL cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
_RL cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
_RL firstResidual
_RL lastResidual
INTEGER numIters
INTEGER myIter
INTEGER myThid
#ifdef ALLOW_NONHYDROSTATIC
C !LOCAL VARIABLES:
C === Local variables ====
C actualIts :: Number of iterations taken
C actualResidual :: residual
C bi,bj :: tile indices
C eta_qrN :: Used in computing search directions
C eta_qrNM1 suffix N and NM1 denote current and
C cgBeta previous iterations respectively.
C alpha
C sumRHS :: Sum of right-hand-side. Sometimes this is a
C useful debuggin/trouble shooting diagnostic.
C For neumann problems sumRHS needs to be ~0.
C or they converge at a non-zero residual.
C err :: Measure of residual of Ax - b, usually the norm.
C i, j, k :: Loop counters
C it3d :: Loop counter for CG iterations
C msgBuf :: Informational/error message buffer
INTEGER actualIts
_RL actualResidual
INTEGER bi, bj
INTEGER i, j, k, it3d
INTEGER km1, kp1
_RL maskM1, maskP1
_RL err, errTile(nSx,nSy)
_RL eta_qrN,eta_qrNtile(nSx,nSy)
_RL eta_qrNM1
_RL cgBeta
_RL alpha , alphaTile(nSx,nSy)
_RL sumRHS, sumRHStile(nSx,nSy)
_RL rhsMax
_RL rhsNorm
CHARACTER*(MAX_LEN_MBUF) msgBuf
_RL surfFac
#ifdef NONLIN_FRSURF
INTEGER ks
_RL surfTerm(sNx,sNy)
#endif /* NONLIN_FRSURF */
CEOP
IF ( select_rStar .NE. 0 ) THEN
surfFac = freeSurfFac
ELSE
surfFac = 0.
ENDIF
#ifdef NONLIN_FRSURF
DO j=1,sNy
DO i=1,sNx
surfTerm(i,j) = 0.
ENDDO
ENDDO
#endif /* NONLIN_FRSURF */
C-- Initialise inverter
eta_qrNM1 = 1. _d 0
C-- Normalise RHS
rhsMax = 0. _d 0
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
DO k=1,Nr
DO j=1,sNy
DO i=1,sNx
cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*cg3dNorm
& * maskC(i,j,k,bi,bj)
rhsMax = MAX(ABS(cg3d_b(i,j,k,bi,bj)),rhsMax)
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
_GLOBAL_MAX_RL( rhsMax, myThid )
rhsNorm = 1. _d 0
IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
DO k=1,Nr
DO j=1,sNy
DO i=1,sNx
cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*rhsNorm
cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)*rhsNorm
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
C-- Update overlaps
c _EXCH_XYZ_RL( cg3d_b, myThid )
_EXCH_XYZ_RL( cg3d_x, myThid )
C-- Initial residual calculation (with free-Surface term)
err = 0. _d 0
sumRHS = 0. _d 0
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
errTile(bi,bj) = 0. _d 0
sumRHStile(bi,bj) = 0. _d 0
#ifdef NONLIN_FRSURF
IF ( select_rStar .NE. 0 ) THEN
DO j=1,sNy
DO i=1,sNx
surfTerm(i,j) = 0.
ENDDO
ENDDO
DO k=1,Nr
DO j=1,sNy
DO i=1,sNx
surfTerm(i,j) = surfTerm(i,j)
& +cg3d_x(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
DO j=1,sNy
DO i=1,sNx
ks = ksurfC(i,j,bi,bj)
surfTerm(i,j) = surfTerm(i,j)*cg3dNorm
& *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj)
& *rA(i,j,bi,bj)*deepFac2F(ks)
& *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTfreesurf
ENDDO
ENDDO
ENDIF
#endif /* NONLIN_FRSURF */
DO k=1,Nr
km1 = MAX(k-1, 1 )
kp1 = MIN(k+1, Nr)
maskM1 = 1. _d 0
maskP1 = 1. _d 0
IF ( k .EQ. 1 ) maskM1 = 0. _d 0
IF ( k .EQ. Nr) maskP1 = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
cg3d_r(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)
& -( 0.
& +aW3d( i, j, k, bi,bj)*cg3d_x(i-1,j, k, bi,bj)
& +aW3d(i+1,j, k, bi,bj)*cg3d_x(i+1,j, k, bi,bj)
& +aS3d( i, j, k, bi,bj)*cg3d_x( i,j-1,k, bi,bj)
& +aS3d( i,j+1,k, bi,bj)*cg3d_x( i,j+1,k, bi,bj)
& +aV3d( i, j, k, bi,bj)*cg3d_x( i, j,km1,bi,bj)*maskM1
& +aV3d( i, j,kp1,bi,bj)*cg3d_x( i, j,kp1,bi,bj)*maskP1
& +aC3d( i, j, k, bi,bj)*cg3d_x( i, j, k, bi,bj)
#ifdef NONLIN_FRSURF
& -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
#endif /* NONLIN_FRSURF */
& )
errTile(bi,bj) = errTile(bi,bj)
& +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
sumRHStile(bi,bj) = sumRHStile(bi,bj)+cg3d_b(i,j,k,bi,bj)
ENDDO
ENDDO
DO J=1-1,sNy+1
DO I=1-1,sNx+1
cg3d_s(i,j,k,bi,bj) = 0.
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
c CALL EXCH_S3D_RL( cg3d_s, Nr, myThid )
CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
IF ( debugLevel.GT.debLevB .AND. diagFreq.GT.0. ) THEN
CALL WRITE_FLD_S3D_RL(
I 'cg3d_r_I', 'I10', 1, Nr, cg3d_r, myIter, myThid )
ENDIF
IF ( debugLevel .GE. debLevZero ) THEN
_BEGIN_MASTER( myThid )
WRITE(standardmessageunit,'(A,1P2E22.14)')
& ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
_END_MASTER( myThid )
ENDIF
actualIts = 0
actualResidual = SQRT(err)
firstResidual=actualResidual
C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
DO 10 it3d=1, numIters
CcnhDebugStarts
c IF ( mod(it3d-1,10).EQ.0)
c & WRITE(*,*) ' CG3D: Iteration ',it3d-1,
c & ' residual = ',actualResidual
CcnhDebugEnds
IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
C-- Solve preconditioning equation and update
C-- conjugate direction vector "s".
C Note. On the next two loops over all tiles the inner loop ranges
C in sNx and sNy are expanded by 1 to avoid a communication
C step. However this entails a bit of gynamastics because we only
C want eta_qrN for the interior points.
eta_qrN = 0. _d 0
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
eta_qrNtile(bi,bj) = 0. _d 0
DO k=1,1
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1-1,sNy+1
DO i=1-1,sNx+1
cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj)
& *cg3d_r(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
DO k=2,Nr
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1-1,sNy+1
DO i=1-1,sNx+1
cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj)
& *( cg3d_r(i,j,k,bi,bj)
& -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj)
& )
ENDDO
ENDDO
ENDDO
DO k=Nr,Nr
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
& +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
DO k=Nr-1,1,-1
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1-1,sNy+1
DO i=1-1,sNx+1
cg3d_q(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj)
& -zMU(i,j,k,bi,bj)*cg3d_q(i,j,k+1,bi,bj)
ENDDO
ENDDO
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
& +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
CcnhDebugStarts
C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
CcnhDebugEnds
cgBeta = eta_qrN/eta_qrNM1
CcnhDebugStarts
C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
CcnhDebugEnds
eta_qrNM1 = eta_qrN
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
DO k=1,Nr
DO j=1-1,sNy+1
DO i=1-1,sNx+1
cg3d_s(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj)
& + cgBeta*cg3d_s(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
C== Evaluate laplace operator on conjugate gradient vector
C== q = A.s
alpha = 0. _d 0
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
alphaTile(bi,bj) = 0. _d 0
#ifdef NONLIN_FRSURF
IF ( select_rStar .NE. 0 ) THEN
DO j=1,sNy
DO i=1,sNx
surfTerm(i,j) = 0.
ENDDO
ENDDO
DO k=1,Nr
DO j=1,sNy
DO i=1,sNx
surfTerm(i,j) = surfTerm(i,j)
& +cg3d_s(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
DO j=1,sNy
DO i=1,sNx
ks = ksurfC(i,j,bi,bj)
surfTerm(i,j) = surfTerm(i,j)*cg3dNorm
& *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj)
& *rA(i,j,bi,bj)*deepFac2F(ks)
& *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTfreesurf
ENDDO
ENDDO
ENDIF
#endif /* NONLIN_FRSURF */
IF ( Nr .GT. 1 ) THEN
k=1
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
cg3d_q(i,j,k,bi,bj) =
& aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
& +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
& +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
& +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
& +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj)
& +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
#ifdef NONLIN_FRSURF
& -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
#endif /* NONLIN_FRSURF */
alphaTile(bi,bj) = alphaTile(bi,bj)
& +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
ENDDO
ENDDO
ELSE
k=1
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
cg3d_q(i,j,k,bi,bj) =
& aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
& +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
& +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
& +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
& +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
#ifdef NONLIN_FRSURF
& -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
#endif /* NONLIN_FRSURF */
alphaTile(bi,bj) = alphaTile(bi,bj)
& +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
ENDDO
ENDDO
ENDIF
DO k=2,Nr-1
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
cg3d_q(i,j,k,bi,bj) =
& aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
& +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
& +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
& +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
& +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj)
& +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj)
& +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
#ifdef NONLIN_FRSURF
& -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
#endif /* NONLIN_FRSURF */
alphaTile(bi,bj) = alphaTile(bi,bj)
& +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
IF ( Nr .GT. 1 ) THEN
k=Nr
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
cg3d_q(i,j,k,bi,bj) =
& aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj)
& +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj)
& +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj)
& +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj)
& +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj)
& +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj)
#ifdef NONLIN_FRSURF
& -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj)
#endif /* NONLIN_FRSURF */
alphaTile(bi,bj) = alphaTile(bi,bj)
& +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj)
ENDDO
ENDDO
ENDIF
ENDDO
ENDDO
CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
CcnhDebugStarts
C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
CcnhDebugEnds
alpha = eta_qrN/alpha
CcnhDebugStarts
C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
CcnhDebugEnds
C== Update solution and residual vectors
C Now compute "interior" points.
err = 0. _d 0
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
errTile(bi,bj) = 0. _d 0
DO k=1,Nr
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
DO j=1,sNy
DO i=1,sNx
cg3d_x(i,j,k,bi,bj)=cg3d_x(i,j,k,bi,bj)
& +alpha*cg3d_s(i,j,k,bi,bj)
cg3d_r(i,j,k,bi,bj)=cg3d_r(i,j,k,bi,bj)
& -alpha*cg3d_q(i,j,k,bi,bj)
errTile(bi,bj) = errTile(bi,bj)
& +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj)
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
err = SQRT(err)
actualIts = it3d
actualResidual = err
IF ( debugLevel.GT.debLevB ) THEN
c IF ( DIFFERENT_MULTIPLE(monitorFreq,myTime,deltaTClock)
c & ) THEN
_BEGIN_MASTER( myThid )
WRITE(msgBuf,'(A,I6,A,1PE21.14)')
& ' cg3d: iter=', actualIts, ' ; resid.= ', actualResidual
CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
& SQUEEZE_RIGHT, myThid )
_END_MASTER( myThid )
c ENDIF
ENDIF
IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
10 CONTINUE
11 CONTINUE
IF ( debugLevel.GT.debLevB .AND. diagFreq.GT.0. ) THEN
CALL WRITE_FLD_S3D_RL(
I 'cg3d_r_F', 'I10', 1, Nr, cg3d_r, myIter, myThid )
ENDIF
C-- Un-normalise the answer
DO bj=myByLo(myThid),myByHi(myThid)
DO bi=myBxLo(myThid),myBxHi(myThid)
DO k=1,Nr
DO j=1,sNy
DO i=1,sNx
cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)/rhsNorm
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
lastResidual = actualResidual
numIters = actualIts
#endif /* ALLOW_NONHYDROSTATIC */
RETURN
END