C $Header: /u/gcmpack/MITgcm/model/src/cg2d_nsa.F,v 1.1 2006/06/07 01:45:42 heimbach Exp $
C $Name:  $

#include "CPP_OPTIONS.h"
#ifdef ALLOW_USE_MPI
C HACK to avoid global_max
# define ALLOW_CONST_RHSMAX
#endif 

CML THIS DOES NOT WORK +++++
#undef ALLOW_LOOP_DIRECTIVE
CBOP
C     !ROUTINE: CG2D_NSA
C     !INTERFACE:
      SUBROUTINE CG2D_NSA(  
     I                cg2d_b,
     U                cg2d_x,
     O                firstResidual,
     O                lastResidual,
     U                numIters,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D_NSA                                           
C     | o Two-dimensional grid problem conjugate-gradient         
C     |   inverter (with preconditioner).                         
C     | o This version is used only in the case when the matrix
C     |   operator is not "self-adjoint" (NSA). Any remaining 
C     |   residuals will immediately reported to the department
C     |   of homeland security.
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.   
C     | It requires the A be symmetric.                           
C     | This implementation assumes A is a five-diagonal          
C     | matrix of the form that arises in the discrete            
C     | representation of the del^2 operator in a                 
C     | two-dimensional space.                                    
C     | Notes:                                                    
C     | ======                                                    
C     | This implementation can support shared-memory              
C     | multi-threaded execution. In order to do this COMMON       
C     | blocks are used for many of the arrays - even ones that    
C     | are only used for intermedaite results. This design is     
C     | OK if you want to all the threads to collaborate on        
C     | solving the same problem. On the other hand if you want    
C     | the threads to solve several different problems            
C     | concurrently this implementation will not work.           
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"
#include "CG2D.h"
#include "SURFACE.h"
#ifdef ALLOW_AUTODIFF_TAMC
# include "tamc.h"
# include "tamc_keys.h"
#endif

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     myThid    - Thread on which I am working.
C     cg2d_b    - The source term or "right hand side"
C     cg2d_x    - The solution
C     firstResidual - the initial residual before any iterations
C     lastResidual  - the actual residual reached
C     numIters  - Entry: the maximum number of iterations allowed
C                 Exit:  the actual number of iterations used
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  lastResidual
      INTEGER numIters
      INTEGER myThid

#ifdef ALLOW_CG2D_NSA
C     !LOCAL VARIABLES:
C     === Local variables ====
C     actualIts      - Number of iterations taken
C     actualResidual - residual
C     bi          - Block index in X and Y.
C     bj
C     eta_qrN     - Used in computing search directions
C     eta_qrNM1     suffix N and NM1 denote current and
C     cgBeta        previous iterations respectively.
C     recip_eta_qrNM1 reciprocal of eta_qrNM1
C     alpha  
C     alpha_aux   - to avoid the statement: alpha = 1./alpha (for TAMC/TAF)
C     sumRHS      - Sum of right-hand-side. Sometimes this is a
C                   useful debuggin/trouble shooting diagnostic.
C                   For neumann problems sumRHS needs to be ~0.
C                   or they converge at a non-zero residual.
C     err         - Measure of residual of Ax - b, usually the norm.
C     err_sq      - square of err (for TAMC/TAF)
C     I, J, N     - Loop counters ( N counts CG iterations )
      INTEGER actualIts
      _RL    actualResidual
      INTEGER bi, bj              
      INTEGER I, J, it2d

      _RL    err
      _RL    err_sq
      _RL    eta_qrN
      _RL    eta_qrNM1
      _RL    recip_eta_qrNM1
      _RL    cgBeta
      _RL    alpha
      _RL    alpha_aux
      _RL    sumRHS
      _RL    rhsMax, rhsMaxGlobal
      _RL    rhsNorm
      _RL    cg2dTolerance_sq
CEOP

#ifdef ALLOW_AUTODIFF_TAMC
      IF ( numIters .GT. numItersMax ) THEN
         WRITE(standardMessageUnit,'(A,I10)') 
     &        'CG2D_NSA: numIters > numItersMax = ', numItersMax
         STOP 'NON-NORMAL in CG2D_NSA'
      ENDIF
#endif /* ALLOW_AUTODIFF_TAMC */

CcnhDebugStarts
C     CHARACTER*(MAX_LEN_FNAM) suff
CcnhDebugEnds

#ifdef ALLOW_AUTODIFF_TAMC
          act1 = myThid - 1
          max1 = nTx*nTy
          act2 = ikey_dynamics - 1
          ikey = (act1 + 1) + act2*max1
#endif /* ALLOW_AUTODIFF_TAMC */

C--   Initialise inverter
      eta_qrNM1 = 1. _d 0
      recip_eta_qrNM1 = 1./eta_qrNM1

CcnhDebugStarts
C     _EXCH_XY_R8( cg2d_b, myThid )
C     CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid )
C     suff = 'unnormalised'
C     CALL WRITE_FLD_XY_RL (  'cg2d_b.',suff,    cg2d_b, 1, myThid)
C     STOP
CcnhDebugEnds

C--   Normalise RHS
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte 
#endif /* ALLOW_AUTODIFF_TAMC */
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      IF (cg2dNormaliseRHS) THEN
C     -  Normalise RHS :
#ifdef LETS_MAKE_JAM
C     _GLOBAL_MAX_R8( rhsMax, myThid )
      rhsMaxGlobal=1.
#else
#ifdef ALLOW_CONST_RHSMAX
      rhsMaxGlobal=1.
#else
      rhsMaxGlobal=rhsMax
      _GLOBAL_MAX_R8( rhsMaxGlobal, myThid )
#endif /* ALLOW_CONST_RHSMAX */
#endif
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte 
#endif /* ALLOW_AUTODIFF_TAMC */
      IF ( rhsMaxGlobal .NE. 0. ) THEN
       rhsNorm = 1. _d 0 / rhsMaxGlobal
      ELSE
       rhsNorm = 1. _d 0
      ENDIF

#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte 
CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte 
#endif /* ALLOW_AUTODIFF_TAMC */
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
          cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO
C- end Normalise RHS
      ENDIF

C--   Update overlaps
      _EXCH_XY_R8( cg2d_b, myThid )
      _EXCH_XY_R8( cg2d_x, myThid )
CcnhDebugStarts
C     CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid )
C     suff = 'normalised'
C     CALL WRITE_FLD_XY_RL (  'cg2d_b.',suff,    cg2d_b, 1, myThid)
CcnhDebugEnds

C--   Initial residual calculation
      err = 0. _d 0
      err_sq = 0. _d 0
      sumRHS = 0. _d 0
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte 
CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte 
#endif /* ALLOW_AUTODIFF_TAMC */
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_s(I,J,bi,bj) = 0.
          cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
     &    (aW2d(I  ,J  ,bi,bj)*cg2d_x(I-1,J  ,bi,bj)
     &    +aW2d(I+1,J  ,bi,bj)*cg2d_x(I+1,J  ,bi,bj)
     &    +aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J-1,bi,bj)
     &    +aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J+1,bi,bj)
     &    -aW2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -aW2d(I+1,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* 
     &     cg2d_x(I  ,J  ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm
CML     &     cg2d_x(I  ,J  ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm
     &    )
          err_sq         = err_sq         + 
     &     cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
          sumRHS            = sumRHS            +
     &     cg2d_b(I,J,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

#ifdef LETS_MAKE_JAM
      CALL EXCH_XY_O1_R8_JAM( cg2d_r )
      CALL EXCH_XY_O1_R8_JAM( cg2d_s )
#else
      _EXCH_XY_R8( cg2d_r, myThid )
      _EXCH_XY_R8( cg2d_s, myThid )
#endif
       _GLOBAL_SUM_R8( sumRHS, myThid )
       _GLOBAL_SUM_R8( err_sq, myThid )
       if ( err_sq .ne. 0. ) then
          err = SQRT(err_sq)
       else
          err = 0.
       end


if actualIts = 0 actualResidual = err _BEGIN_MASTER( myThid ) write(standardMessageUnit,'(A,1P2E22.14)') & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMaxGlobal _END_MASTER( ) C _BARRIER c _BEGIN_MASTER( myThid ) c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', c & actualIts, actualResidual c _END_MASTER( ) firstResidual=actualResidual cg2dTolerance_sq = cg2dTolerance**2 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Cml begin main solver loop #if ((defined ALLOW_AUTODIFF_TAMC) (defined ALLOW_LOOP_DIRECTIVE)) CADJ LOOP = iteration, cg2d_x = comlev_cg2d_iter #endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */ DO it2d=1, numIters #ifdef ALLOW_LOOP_DIRECTIVE CML it2d = 0 CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters ) CML it2d = it2d+1 #endif /* ALLOW_LOOP_DIRECTIVE */ #ifdef ALLOW_AUTODIFF_TAMC icg2dkey = (ikey-1)*numItersMax + it2d CMLCADJ STORE err = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ CML IF ( err .LT. cg2dTolerance ) THEN IF ( err_sq .LT. cg2dTolerance_sq ) THEN Cml DO NOTHING ELSE CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' residual = ', C & actualResidual CcnhDebugEnds C-- Solve preconditioning equation and update C-- conjugate direction vector "s". eta_qrN = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx cg2d_z(I,J,bi,bj) = & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj) & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj) & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj) & +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj) & +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj) CcnhDebugStarts C cg2d_z(I,J,bi,bj) = cg2d_r(I ,J ,bi,bj) CcnhDebugEnds eta_qrN = eta_qrN & +cg2d_z(I,J,bi,bj)*cg2d_r(I,J,bi,bj) ENDDO ENDDO ENDDO ENDDO _GLOBAL_SUM_R8(eta_qrN, myThid) CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' eta_qrN = ',eta_qrN CcnhDebugEnds #ifdef ALLOW_AUTODIFF_TAMC CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ CML cgBeta = eta_qrN/eta_qrNM1 cgBeta = eta_qrN*recip_eta_qrNM1 CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' beta = ',cgBeta CcnhDebugEnds Cml store normalisation factor for the next interation Cml (in case there is one). CML store the inverse of the normalization factor for higher precision CML eta_qrNM1 = eta_qrN recip_eta_qrNM1 = 1./eta_qrN DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx cg2d_s(I,J,bi,bj) = cg2d_z(I,J,bi,bj) & + cgBeta*cg2d_s(I,J,bi,bj) ENDDO ENDDO ENDDO ENDDO C-- Do exchanges that require messages i.e. between C-- processes. #ifdef LETS_MAKE_JAM CALL EXCH_XY_O1_R8_JAM( cg2d_s ) #else _EXCH_XY_R8( cg2d_s, myThid ) #endif C== Evaluate laplace operator on conjugate gradient vector C== q = A.s alpha = 0. _d 0 alpha_aux = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC #ifndef ALLOW_LOOP_DIRECTIVE CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* not ALLOW_LOOP_DIRECTIVE */ #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx cg2d_q(I,J,bi,bj) = & aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj) & +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj) & +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj) & +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj) & -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) & -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) & -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) & -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj) & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm CML & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm alpha_aux = alpha_aux+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj) ENDDO ENDDO ENDDO ENDDO _GLOBAL_SUM_R8(alpha_aux,myThid) CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' SUM(s*q)= ',alpha_aux CcnhDebugEnds alpha = eta_qrN/alpha_aux CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' alpha= ',alpha CcnhDebugEnds C== Update solution and residual vectors C Now compute "interior" points. err = 0. _d 0 err_sq = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC #ifndef ALLOW_LOOP_DIRECTIVE CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_LOOP_DIRECTIVE */ #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj) cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj) err_sq = err_sq+cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj) ENDDO ENDDO ENDDO ENDDO _GLOBAL_SUM_R8( err_sq , myThid ) if ( err_sq .ne. 0. ) then err = SQRT(err_sq) else err = 0. end


if actualIts = it2d actualResidual = err #ifdef LETS_MAKE_JAM CALL EXCH_XY_O1_R8_JAM( cg2d_r ) #else _EXCH_XY_R8( cg2d_r, myThid ) _EXCH_XY_R8( cg2d_x, myThid ) #endif Cml end of IF ( err .LT. cg2dTolerance ) THEN; ELSE ENDIF Cml end main solver loop ENDDO IF (cg2dNormaliseRHS) THEN #ifdef ALLOW_AUTODIFF_TAMC CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ C-- Un-normalise the answer DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm ENDDO ENDDO ENDDO ENDDO ENDIF C The following exchange was moved up to solve_for_pressure C for compatibility with TAMC. C _EXCH_XY_R8(cg2d_x, myThid ) c _BEGIN_MASTER( myThid ) c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', c & actualIts, actualResidual c _END_MASTER( ) C-- Return parameters to caller lastResidual=actualResidual numIters=actualIts #endif /* ALLOW_CG2D_NSA */ RETURN END


# if ((defined ALLOW_AUTODIFF_TAMC) (defined ALLOW_LOOP_DIRECTIVE)) C C These routines are routinely part of the TAMC/TAF library that is C not included in the MITcgm, therefore they are mimicked here. C subroutine ADSTORE(chardum,int1,idow,int2,int3,icount) implicit none #include "SIZE.h" #include "tamc.h" character*(*) chardum integer int1, int2, int3, idow, icount C the length of this vector must be greater or equal C twice the number of timesteps integer nidow #ifdef ALLOW_TAMC_CHECKPOINTING parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 ) #else parameter ( nidow = 1000000 ) #endif /* ALLOW_TAMC_CHECKPOINTING */ integer istoreidow(nidow) common /istorecommon/ istoreidow print *, 'adstore: ', chardum, int1, idow, int2, int3, icount if ( icount .gt. nidow ) then print *, 'adstore: error: icount > nidow = ', nidow stop 'ABNORMAL STOP in adstore' endif istoreidow(icount) = idow return end


subroutine ADRESTO(chardum,int1,idow,int2,int3,icount) implicit none #include "SIZE.h" #include "tamc.h" character*(*) chardum integer int1, int2, int3, idow, icount C the length of this vector must be greater or equal C twice the number of timesteps integer nidow #ifdef ALLOW_TAMC_CHECKPOINTING parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 ) #else parameter ( nidow = 1000000 ) #endif /* ALLOW_TAMC_CHECKPOINTING */ integer istoreidow(nidow) common /istorecommon/ istoreidow print *, 'adresto: ', chardum, int1, idow, int2, int3, icount if ( icount .gt. nidow ) then print *, 'adstore: error: icount > nidow = ', nidow stop 'ABNORMAL STOP in adstore' endif idow = istoreidow(icount) return end


# endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */