C $Header: /u/gcmpack/MITgcm/model/src/cg3d.F,v 1.15 2005/02/04 19:30:33 jmc Exp $
C $Name:  $

#include "PACKAGES_CONFIG.h"
#include "CPP_OPTIONS.h"

#define VERBOSE

CBOP
C     !ROUTINE: CG3D
C     !INTERFACE:
      SUBROUTINE CG3D(  
     I                cg3d_b,
     U                cg3d_x,
     O                firstResidual,
     O                lastResidual,
     U                numIters,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG3D                                           
C     | o Three-dimensional grid problem conjugate-gradient       
C     |   inverter (with preconditioner).                         
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.   
C     | It requires the A be symmetric.                           
C     | This implementation assumes A is a seven-diagonal          
C     | matrix of the form that arises in the discrete            
C     | representation of the del^2 operator in a                 
C     | three-dimensional space.                                    
C     | Notes:                                                    
C     | ======                                                    
C     | This implementation can support shared-memory              
C     | multi-threaded execution. In order to do this COMMON       
C     | blocks are used for many of the arrays - even ones that    
C     | are only used for intermedaite results. This design is     
C     | OK if you want to all the threads to collaborate on        
C     | solving the same problem. On the other hand if you want    
C     | the threads to solve several different problems            
C     | concurrently this implementation will not work.           
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"
#include "CG3D.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     myThid    - Thread on which I am working.
C     cg2d_b    - The source term or "right hand side"
C     cg2d_x    - The solution
C     firstResidual - the initial residual before any iterations
C     lastResidual  - the actual residual reached
C     numIters  - Entry: the maximum number of iterations allowed
C                 Exit:  the actual number of iterations used
      _RL  cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
      _RL  cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
      _RL  firstResidual
      _RL  lastResidual
      INTEGER numIters
      INTEGER myThid


#ifdef ALLOW_NONHYDROSTATIC

C     !LOCAL VARIABLES:
C     === Local variables ====
C     actualIts      - Number of iterations taken
C     actualResidual - residual
C     bi          - Block index in X and Y.
C     bj
C     eta_qrN     - Used in computing search directions
C     eta_qrNM1     suffix N and NM1 denote current and
C     cgBeta        previous iterations respectively.
C     alpha  
C     sumRHS      - Sum of right-hand-side. Sometimes this is a
C                   useful debuggin/trouble shooting diagnostic.
C                   For neumann problems sumRHS needs to be ~0.
C                   or they converge at a non-zero residual.
C     err         - Measure of residual of Ax - b, usually the norm.
C     I, J, N     - Loop counters ( N counts CG iterations )
      INTEGER actualIts
      _RL    actualResidual
      INTEGER bi, bj              
      INTEGER I, J, K, it3d
      INTEGER KM1, KP1
      _RL    err, errTile
      _RL    eta_qrN, eta_qrNtile
      _RL    eta_qrNM1
      _RL    cgBeta
      _RL    alpha , alphaTile
      _RL    sumRHS, sumRHStile
      _RL    rhsMax
      _RL    rhsNorm

      INTEGER OLw
      INTEGER OLe
      INTEGER OLn
      INTEGER OLs
      INTEGER exchWidthX
      INTEGER exchWidthY
      INTEGER myNz
      _RL     topLevTerm
CEOP

ceh3 needs an IF ( useNONHYDROSTATIC ) THEN


C--   Initialise inverter
      eta_qrNM1 = 1. D0

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO K=1,Nr
         DO J=1,sNy
          DO I=1,sNx
           cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*cg3dNorm
           rhsMax = MAX(ABS(cg3d_b(I,J,K,bi,bj)),rhsMax)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
      ENDDO
      _GLOBAL_MAX_R8( rhsMax, myThid )
      rhsNorm = 1. _d 0
      IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO K=1,Nr
         DO J=1,sNy
          DO I=1,sNx
           cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*rhsNorm
           cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)*rhsNorm
          ENDDO
         ENDDO
        ENDDO
       ENDDO
      ENDDO

C--   Update overlaps
      _EXCH_XYZ_R8( cg3d_b, myThid )
      _EXCH_XYZ_R8( cg3d_x, myThid )

C--   Initial residual calculation (with free-Surface term)
      err    = 0. _d 0
      sumRHS = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile    = 0. _d 0
        sumRHStile = 0. _d 0
        DO K=1,Nr
         KM1 = K-1
         IF ( K .EQ. 1 ) KM1 = 1
         KP1 = K+1
         IF ( K .EQ. Nr ) KP1 = 1 
         topLevTerm = 0.
         IF ( K .EQ. 1) topLevTerm = freeSurfFac*cg3dNorm*
     &      (horiVertRatio/gravity)/deltaTMom/deltaTMom
         DO J=1,sNy
          DO I=1,sNx
           cg3d_s(I,J,K,bi,bj) = 0.
           cg3d_r(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj) -( 0.
     &     +aW3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I-1,J  ,K  ,bi,bj)
     &     +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_x(I+1,J  ,K  ,bi,bj)
     &     +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J-1,K  ,bi,bj)
     &     +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_x(I  ,J+1,K  ,bi,bj)
     &     +aV3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,KM1,bi,bj)
     &     +aV3d(I  ,J  ,KP1,bi,bj)*cg3d_x(I  ,J  ,KP1,bi,bj)
     &     -aW3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     -aW3d(I+1,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     -aV3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     -aV3d(I  ,J  ,KP1,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     -topLevTerm*_rA(I,J,bi,bj)*cg3d_x(I,J,K,bi,bj)
     &     )
           errTile = errTile
     &     +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           sumRHStile = sumRHStile
     &     +cg3d_b(I,J,K,bi,bj)
          ENDDO
         ENDDO
        ENDDO
        err    = err    + errTile
        sumRHS = sumRHS + sumRHStile
       ENDDO
      ENDDO
C     _EXCH_XYZ_R8( cg3d_r, myThid )
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
       exchWidthX = 1
       exchWidthY = 1
       myNz       = Nr
       CALL EXCH_RL( cg3d_r,
     I            OLw, OLe, OLs, OLn, myNz,
     I            exchWidthX, exchWidthY,
     I            FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
C     _EXCH_XYZ_R8( cg3d_s, myThid )
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
       exchWidthX = 1
       exchWidthY = 1
       myNz       = Nr
       CALL EXCH_RL( cg3d_s,
     I            OLw, OLe, OLs, OLn, myNz,
     I            exchWidthX, exchWidthY,
     I            FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
      _GLOBAL_SUM_R8( sumRHS, myThid )
      _GLOBAL_SUM_R8( err   , myThid )
      
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        write(*,'(A,1P2E22.14)')
     &     ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF

      actualIts      = 0
      actualResidual = SQRT(err)
C     _BARRIER
c     _BEGIN_MASTER( myThid )
c      WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
c    &  actualIts, actualResidual
c     _END_MASTER( myThid )
      firstResidual=actualResidual

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO 10 it3d=1, cg3dMaxIters

CcnhDebugStarts
#ifdef VERBOSE
c      IF ( mod(it3d-1,10).EQ.0)
c    &   WRITE(*,*) ' CG3D: Iteration ',it3d-1,
c    &      ' residual = ',actualResidual
#endif
CcnhDebugEnds
       IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
C      Note. On the next to loops over all tiles the inner loop ranges
C            in sNx and sNy are expanded by 1 to avoid a communication 
C            step. However this entails a bit of gynamastics because we only 
C            want eta_qrN for the interior points.
       eta_qrN = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         eta_qrNtile = 0. _d 0
         DO K=1,1
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_q(I,J,K,bi,bj) = 
     &       zMC(I  ,J  ,K,bi,bj)*cg3d_r(I  ,J  ,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         DO K=2,Nr
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_q(I,J,K,bi,bj) = 
     &       zMC(I,J,K,bi,bj)*(cg3d_r(I,J,K  ,bi,bj)
     &      -zML(I,J,K,bi,bj)*cg3d_q(I,J,K-1,bi,bj))
           ENDDO
          ENDDO
         ENDDO
         DO K=Nr,Nr
caja      IF (Nr .GT. 1) THEN
caja       DO J=1-1,sNy+1
caja        DO I=1-1,sNx+1
caja         cg3d_q(I,J,K,bi,bj) = 
caja &        zMC(i,j,k,bi,bj)*(cg3d_r(i,j,k  ,bi,bj)
caja &       -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj))
caja        ENDDO
caja       ENDDO
caja      ENDIF
          DO J=1,sNy
           DO I=1,sNx
            eta_qrNtile = eta_qrNtile
     &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         DO K=Nr-1,1,-1
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_q(I,J,K,bi,bj) = 
     &      cg3d_q(I,J,K,bi,bj)
     &      -zMU(I,J,K,bi,bj)*cg3d_q(I,J,K+1,bi,bj)
           ENDDO
          ENDDO
          DO J=1,sNy
           DO I=1,sNx
            eta_qrNtile = eta_qrNtile
     &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         eta_qrN = eta_qrN + eta_qrNtile
        ENDDO
       ENDDO
caja
caja  eta_qrN=0.
caja   DO bj=myByLo(myThid),myByHi(myThid)
caja    DO bi=myBxLo(myThid),myBxHi(myThid)
caja     DO K=1,Nr
caja      DO J=1,sNy
caja       DO I=1,sNx
caja        eta_qrN = eta_qrN
caja &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
caja       ENDDO
caja      ENDDO
caja     ENDDO
caja    ENDDO
caja   ENDDO
caja

       _GLOBAL_SUM_R8(eta_qrN, myThid)
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
CcnhDebugEnds
       cgBeta   = eta_qrN/eta_qrNM1
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
CcnhDebugEnds
       eta_qrNM1 = eta_qrN

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO K=1,Nr
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_s(I,J,K,bi,bj) = cg3d_q(I,J,K,bi,bj) 
     &                          + cgBeta*cg3d_s(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
       alpha = 0. _d 0
       topLevTerm = freeSurfFac*cg3dNorm*
     &      (horiVertRatio/gravity)/deltaTMom/deltaTMom
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         alphaTile = 0. _d 0
         IF ( Nr .GT. 1 ) THEN
          DO K=1,1
           DO J=1,sNy
            DO I=1,sNx
             cg3d_q(I,J,K,bi,bj) = 
     &       aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &      +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &      +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &      +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &      +aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K+1,bi,bj)
     &      -aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -topLevTerm*_rA(I,J,bi,bj)*cg3d_s(I,J,K,bi,bj)
             alphaTile = alphaTile
     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ELSE
          DO K=1,1
           DO J=1,sNy
            DO I=1,sNx
             cg3d_q(I,J,K,bi,bj) = 
     &       aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &      +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &      +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &      +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &      -aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -topLevTerm*_rA(I,J,bi,bj)*cg3d_s(I,J,K,bi,bj)
             alphaTile = alphaTile
     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDIF
         DO K=2,Nr-1
          DO J=1,sNy
           DO I=1,sNx
            cg3d_q(I,J,K,bi,bj) = 
     &      aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &     +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &     +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &     +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &     +aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K-1,bi,bj)
     &     +aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K+1,bi,bj)
     &     -aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &     -aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &     -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &     -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &     -aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &     -aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
            alphaTile = alphaTile
     &                +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         IF ( Nr .GT. 1 ) THEN
          DO K=Nr,Nr
           DO J=1,sNy
            DO I=1,sNx
             cg3d_q(I,J,K,bi,bj) = 
     &       aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &      +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &      +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &      +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &      +aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K-1,bi,bj)
     &      -aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
     &      -aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
             alphaTile = alphaTile
     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDIF
         alpha = alpha + alphaTile
        ENDDO
       ENDDO
       _GLOBAL_SUM_R8(alpha,myThid)
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
CcnhDebugEnds
       alpha = eta_qrN/alpha
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
CcnhDebugEnds
     
C==    Update solution and residual vectors
C      Now compute "interior" points.
       err = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile    = 0. _d 0
         DO K=1,Nr
          DO J=1,sNy
           DO I=1,sNx
            cg3d_x(I,J,K,bi,bj)=cg3d_x(I,J,K,bi,bj)
     &            +alpha*cg3d_s(I,J,K,bi,bj)
            cg3d_r(I,J,K,bi,bj)=cg3d_r(I,J,K,bi,bj)
     &            -alpha*cg3d_q(I,J,K,bi,bj)
           errTile = errTile
     &             +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         err = err + errTile
        ENDDO
       ENDDO

       _GLOBAL_SUM_R8( err   , myThid )
       err = SQRT(err)
       actualIts      = it3d
       actualResidual = err
       IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
C      _EXCH_XYZ_R8(cg3d_r, myThid )
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
       exchWidthX = 1
       exchWidthY = 1
       myNz       = Nr
       CALL EXCH_RL( cg3d_r,
     I             OLw, OLe, OLs, OLn, myNz,
     I             exchWidthX, exchWidthY,
     I             FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )

   10 CONTINUE
   11 CONTINUE

C--   Un-normalise the answer
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO K=1,Nr
         DO J=1,sNy
          DO I=1,sNx
           cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)/rhsNorm
          ENDDO
         ENDDO
        ENDDO
       ENDDO
      ENDDO

Cadj  _EXCH_XYZ_R8(cg3d_x, myThid )
c     _BEGIN_MASTER( myThid )
c      WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
c    &  actualIts, actualResidual
c     _END_MASTER( myThid )
      lastResidual=actualResidual
      numIters=actualIts

#endif /* ALLOW_NONHYDROSTATIC */

      RETURN
      END