C $Header: /u/gcmpack/MITgcm/eesupp/src/check_threads.F,v 1.16 2012/03/30 14:18:07 jmc Exp $ C $Name: $ #include "CPP_EEOPTIONS.h" CBOP C !ROUTINE: CHECK_THREADS C !INTERFACE: SUBROUTINE CHECK_THREADS( myThid ) C !DESCRIPTION: C *========================================================== C | SUBROUTINE CHECK\_THREADS C | o Check that all the threads we need did indeed start. C *========================================================== C | This routine is called during the initialisation phase C | to check whether all the threads have started. C | It is invoked by every thread and if any thread finds an C | error it should set its error flag. C | Notes: C | Different mechanisms may be required on different C | platforms to actually perform the check. For example as C | coded here each thread checks for a semaphore set by the C | other threads to see if they are running. C | It is also possible for a system to schedule threads C | sequentially, unless some system call is made to yield C | the process. This routine would detect this situation too C | and allow a programmer to modify this routine and the C | barrier code to allow threads to be scheduled more C | appropriately. C *========================================================== C !USES: IMPLICIT NONE C == Global variables == #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" C !INPUT PARAMETERS: C == Routine arguments == C myThid :: My thread number INTEGER myThid C !FUNCTIONS: #ifdef USE_OMP_THREADING INTEGER OMP_GET_NUM_THREADS EXTERNAL #endif C !LOCAL VARIABLES: C == Local variables == C I :: Loop counter C numberThreadRunning :: Count of number of threads this thread C thinks are running. C nChecks :: Number of times checked for all threads. After so C many checks give up and report an error. C msgBuf :: Informational/error message buffer INTEGER nChecks CHARACTER*(MAX_LEN_MBUF) msgBuf #ifdef USE_OMP_THREADING #ifdef ALLOW_USE_MPI INTEGER myErr, mpiRC #endif #else /* USE_OMP_THREADING */ INTEGER I, numberThreadsRunning #endif /* USE_OMP_THREADING */ CEOP C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| #ifdef USE_OMP_THREADING C-- Check early-on that number of threads match IF ( OMP_GET_NUM_THREADS() .NE. nThreads ) THEN C- This process has problems in multi-threads setting (detected by C all pseudo-threads); note: cannot use any Barrier in this context WRITE(msgBuf,'(2A,I6)') 'CHECK_THREADS:', & ' from "eedata", nThreads=', nThreads CALL PRINT_ERROR( msgBuf, myThid ) WRITE(msgBuf,'(2A,I6)') ' not equal to ', & 'Env.Var. OMP_NUM_THREADS=', OMP_GET_NUM_THREADS() CALL PRINT_ERROR( msgBuf, myThid ) thError(myThid) = .TRUE. eeBootError = .TRUE. IF ( myThid.EQ.1 ) THEN C- one pseudo-thread (thId=1) export the error to other MPI processes nChecks = 1 #ifdef ALLOW_USE_MPI IF ( usingMPI ) THEN myErr = nChecks CALL MPI_ALLREDUCE( myErr,nChecks,1,MPI_INTEGER, & MPI_SUM,MPI_COMM_MODEL,mpiRC ) ENDIF #endif /* ALLOW_USE_MPI */ ENDIF ELSE C- this process has a working multi-threads setting threadIsRunning(myThid) = .TRUE. IF ( myThid.EQ.1 ) THEN C- master collects error from other MPI processes nChecks = 0 #ifdef ALLOW_USE_MPI IF ( usingMPI ) THEN myErr = nChecks CALL MPI_ALLREDUCE( myErr,nChecks,1,MPI_INTEGER, & MPI_SUM,MPI_COMM_MODEL,mpiRC ) ENDIF #endif /* ALLOW_USE_MPI */ IF ( nChecks.NE.0 ) THEN WRITE(msgBuf,'(A,I5,A)') 'CHECK_THREADS:', nChecks, & ' error(s) from other Processes' CALL PRINT_ERROR( msgBuf, myThid ) eeBootError = .TRUE. ENDIF ENDIF C- ensure all threads leave with updated eeBootError (shared) value C$OMP BARRIER ENDIF #else /* ndef USE_OMP_THREADING */ threadIsRunning(myThid) = .TRUE. nChecks = 0 10 CONTINUE numberThreadsRunning = 0 nChecks = nChecks + 1 DO I = 1, nThreads IF ( threadIsRunning(I) ) & numberThreadsRunning = numberThreadsRunning+1 ENDDO IF ( nChecks .GT. 10 ) THEN thError(myThid) = .TRUE. eeBootError = .TRUE. WRITE(msgBuf,'(A,I5,A,I5,A)') & 'CHECK_THREADS: Only ',numberThreadsRunning, & ' thread(s), ',nThreads,' are needed for this config!' CALL PRINT_ERROR( msgBuf, myThid ) C-- Not enough threads are running so halt the program. C I did not want this here but it is the only place I have found that C KAP deadlocks if it there are fewer threads than iterations in a BLOCKED(1) C loop. The deadlock appears to be in the routine mppjoin which never C returns. I tried putting the STOP in main or breaking out of the loop in main C but this causes KAP to insert a call to mppjoin - which then deadlocks! IF ( myThid .EQ. 1 ) THEN STOP 'ABNORMAL END: S/R CHECK_THREADS' ENDIF GOTO 11 ENDIF IF ( numberThreadsRunning .NE. nThreads ) THEN #ifdef HAVE_SYSTEM CALL SYSTEM('sleep 1') #endif GOTO 10 ENDIF 11 CONTINUE #endif /* ndef USE_OMP_THREADING */ C-- check barrier synchronization: 1rst (initial) call. IF ( .NOT. eeBootError ) THEN CALL BAR_CHECK( 1, myThid ) ENDIF RETURN END