#! /usr/bin/env bash
#
# This is a shell script to combine multiple MITgcm mnc output files from
# different tiles into one global file.
# All of the variables should be in one directory, where this script is run.
#
# To combine all state.0000000000.t*.nc files,
#  gluemnc state.0000000000.*.nc
# This will result in an output file state.0000000000.glob.nc
# Where glob is for global.
#
# You can even combine all mnc files, use
#  gluemnc *.nc
# This will result in a series of global files,
# state.0000000000.glob.nc state.0000000100.glob.nc, ...
# grid.0000000000.glob.nc grid.0000000100.glob.nc, ...
# diag.0000000000.glob.nc diag.0000000100.glob.nc, ...
#
# A lot of hard drive activity is needed.  If you have a fast drive
# export TMPDIR=<path of hard drive>.  On some high-performance
# systems, this is already done for you.
#
# **********WARNINGS**********
# This will probably not work at all with exch2/cubed sphere.
# In that case, you probably can assemble all of the tiles on a face,
# but combining faces is currently not implemented.
#
# Be sure you have enough disk space for the copies!  In this version
# nothing is done to assure all of the data is copied.
#
# Be careful!  It will be easy to exceed the 2 GB limit for the old 32-bit
# version of netcdf.  If you do not have large-file support or 64-bit netcdf
# you will have to be clever in dividing up your tiled files,
# e.g., along the time dimension before combining to global files.
# The nco operator ncks is adept at shortening files to fewer snapshots.
# *****************************
#
# Good luck and happy gluing,
# Baylor Fox-Kemper

DEBUG="--dbg_lvl=0"
LOGFILE="/dev/null"

DIRORIG=`pwd`

if [ ! ${#TMPDIR} -gt 0 ]; then
  TMPDIR=$DIRORIG
fi

export DIRNAME="$TMPDIR/gluedir.$RANDOM"
mkdir $DIRNAME

echo Using temporary directory $DIRNAME

if [ -f xplodemnc ]; then
 cp xplodemnc $DIRNAME
else
 cp `which xplodemnc` $DIRNAME
fi

# find an unambiguous name for a new record dimension
myrecname=record`echo $DIRNAME | awk -F. '{print $NF}'`

cd $DIRNAME

inone=$1
inone=${1:?"You must input mnc filenames to be glued"}

for somefile in $@
do
  ln -s $DIRORIG/$somefile .
  if [ ! -s $somefile ]; then
    echo "Error: $somefile is missing or empty"
    exit 1
  fi
done

prels=${@%.t???.nc}

for somepre in $prels
do
  inls=0
  for somepres in $sprels
  do
    if [ "$somepre" = "$somepres" ]; then
      inls=1
    fi
  done
  if [ "$inls" = "0" ]; then
   sprels=$sprels" "$somepre
  fi
done

prels=$sprels

# ML: determine the coordinate variable (this is hack for the unlikely
# case that we do not have X or Y as coordinate variables; this can
# happen, when only U-point or V-point variables are written to a
# diagnostics stream; I do not know if this always works, but it works for me)
echo Determine a usable coordinate variable
somefile=${prels}.t001.nc
# first try X and Y
Xcoord=X
Ycoord=Y
Xtest=$(ncdump -vX -l 10000 $somefile | grep "X = ")
Ytest=$(ncdump -vY -l 10000 $somefile | grep "Y = ")
if [ ${#Xtest} = 0 ]; then
    echo "X not found, trying Xp1"
    Xtest=$(ncdump -vXp1 -l 10000 $somefile | grep "Xp1 = ")
    Xcoord=Xp1
fi
if [ ${#Xtest} = 0 ]; then
    echo "no X-coordinate found"
    Xcoord=
fi
if [ ${#Ytest} = 0 ]; then
    echo "Y not found, trying Yp1"
    Ytest=$(ncdump -vYp1 -l 10000 $somefile | grep "Yp1 = ")
    Ycoord=Yp1
fi
if [ ${#Ytest} = 0 ]; then
    echo "no Y-coordinate found"
    Ycoord=
fi
if [ ${#Xcoord} = 0 ]; then
    echo cannot continue
    exit
fi
if [ ${#Ycoord} = 0 ]; then
    echo cannot continue
    exit
fi

for somepre in $prels
do
  echo Making $somepre.glob.nc...
  Xsls=
  Ysls=

  for somefile in $@
  do
    if [ "${somefile%.t???.nc}" = "$somepre" ]; then
      echo Scanning $somefile...
      Xs=$(ncdump -v${Xcoord} -l 10000 $somefile | grep "${Xcoord} = ")
      Xs=${Xs#*;}
      Xs=${Xs%;*}
      Xs=$(echo $Xs | sed s/' '//g)
      Xsls=$Xsls$Xs" "

      Ys=$(ncdump -v${Ycoord} -l 10000 $somefile | grep "${Ycoord} = ")
      Ys=${Ys#*;}
      Ys=${Ys%;*}
      Ys=$(echo $Ys | sed s/' '//g)
      Ysls=$Ysls$Ys" "
    fi
  done

  sYsls=
  sXsls=

  # Determine all the X locations
  countx=0
  for someXs in $Xsls
  do
    inls=0
    for somesXs in $sXsls
    do
      if [ "$someXs" = "$somesXs" ]; then
        inls=1
      fi
    done
    if [ "$inls" = "0" ]; then
      sXsls=$sXsls$someXs" "
      countx=$((countx))+1
    fi
  done
  echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  echo $((countx)) tiles found in x-direction.
  echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

  # Determine all the Y locations
  county=0
  for someYs in $Ysls
  do
    inls=0
    for somesYs in $sYsls
    do
      if [ "$someYs" = "$somesYs" ]; then
        inls=1
      fi
    done
    if [ "$inls" = "0" ]; then
      sYsls=$sYsls$someYs" "
      county=$((county))+1
    fi
  done
  echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
  echo $((county)) tiles found in y-direction.
  echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY

  countyy=1000
  countxx=1000

  cntls=
  for someX in $sXsls
  do
    countxx=$((countxx+1))
    cntls=$cntls$countxx" "
    echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
    echo  Prepping X tile $((countxx-1000))
    echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

    for somefile in $@
    do
      if [ "${somefile%.t???.nc}" = "$somepre" ]; then
        Xs=$(ncdump -v${Xcoord} -l 10000 $somefile | grep "${Xcoord} = ")
        Xs=${Xs#*;}
        Xs=${Xs%;*}
        Xs=$(echo $Xs | sed s/' '//g)

        if [ "$someX" = $Xs ]; then
          ./xplodemnc $somefile
          if [ -f iter.$somefile ]; then
            mv iter.$somefile iter.${somefile%t???.nc}glob.nc
          fi
          for somesplit in $(ls *.$somefile)
          do
            # Added to account for grid files with no T dimension defined:
            # Search for the unlimited dimension and get its name, assuming
            # that its first character is a letter from the alphabet, the "tr"
            # removes the blank characters
            recname=$(ncdump -h $somesplit \
                      | sed -n 's/\([a-z,A-Z]*\) = \(UNLIMITED\) .*/\1/p' \
                      | tr -d ' \t' )
            if [[ -z "$recname" ]]; then
               echo "No record dimension found, adding one now: "$myrecname
               ncecat $DEBUG -O -u $myrecname $somesplit $somesplit > $LOGFILE
	       recname=$myrecname
            fi
            withY=$(ncdump -h $somesplit | grep "Y =")
            if [ ${#withY} -gt 1 ]; then
              echo Changing Y to record variable in $somesplit
              ncpdq $DEBUG -O -a Y,$recname $somesplit $somesplit > $LOGFILE
              mv $somesplit i$countxx.$somesplit
            fi

            if [ -f $somesplit ]; then
              withYp1=$(ncdump -h $somesplit | grep "Yp1 =")
              if [ ${#withYp1} -gt 1 ]; then
                Yp1len=${withYp1#*= }
                Yp1len=$((${Yp1len% ;}-1))
                #  Strip off the repeated value in Yp1
                echo Changing Yp1 to record variable in $somesplit
                ncpdq $DEBUG -O -a Yp1,$recname -F -d Yp1,1,$Yp1len $somesplit $somesplit > $LOGFILE
                mv $somesplit i$countxx.$somesplit
              fi
            fi
          done
        fi
      fi
    done
  done
  echo Tile names $cntls
  for countxx in $cntls
  do
    echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
    echo  Combining X tile $((countxx-1000))
    echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
    varls=
    cxfilels=$(ls i$countxx.*)
    oldvar=
    for somefile in $cxfilels
    do
      varname=`echo $somefile | sed 's/^i[0-9][0-9][0-9][0-9]\.//; s/\..*nc//'`
      if [ "$varname" = "$oldvar" ]; then
        echo $varname repeated
      else
        varls=$varls$varname" "
      fi
      oldvar=$varname
    done

    echo Found these variables to combine: $varls

    for somevar in $varls
    do
      echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
      echo Combining $somevar files in Y
      echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY

      filelist=(`ls i$countxx.$somevar.$somepre.*.nc`)
      withY=$(ncdump -h ${filelist[0]} | grep "Y =")
      withYp1=$(ncdump -h ${filelist[0]} | grep "Yp1 =")

      ncrcat $DEBUG i$countxx.$somevar.$somepre.*.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE
      echo Just combined $countxx.$somevar
      rm i$countxx.$somevar.$somepre.t???.nc

      # Recover the name of the record variable, there are two possibilities:
      # T (MITgcm convention) and $myrecname.
      # I must admit that this could be more elegant.
      recname=$(ncdump -h $somevar.$somepre.gloy.$countxx.nc | grep $myrecname)
      if [[ -z $recname ]]; then
       recname=T
      else
       recname=$myrecname
      fi
      if [ ${#withY} -gt 1 ]; then
        echo Changing $recname to record variable in $somevar.$somepre.gloy.$countxx.nc
        ncpdq $DEBUG -O -a $recname,Y $somevar.$somepre.gloy.$countxx.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE
      fi

      if [ ${#withYp1} -gt 1 ]; then
        echo Changing $recname to record variable in $somevar.$somepre.gloy.$countxx.nc
        ncpdq $DEBUG -O -a $recname,Yp1 $somevar.$somepre.gloy.$countxx.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE
      fi
    done
  done
  for somevar in $varls
  do
    echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
    echo Combining $somevar files in X...
    echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
    for somegloy in $(ls $somevar.$somepre.gloy.*.nc)
    do
      withX=$(ncdump -h $somegloy | grep "X =")
      withXp1=$(ncdump -h $somegloy | grep "Xp1 =")
      recname=$(ncdump -h $somegloy \
                      | sed -n 's/\([a-z,A-Z]*\) = \(UNLIMITED\) .*/\1/p' \
                      | tr -d ' \t' )

      if [ ${#withX} -gt 1 ]; then
        echo Changing X to record variable in $somegloy
        ncpdq $DEBUG -O -a X,$recname $somegloy $somegloy > $LOGFILE
      fi

      if [ ${#withXp1} -gt 1 ]; then
        Xp1len=${withXp1#*= }
        Xp1len=$((${Xp1len% ;}-1))
        #  Strip off the repeated value in Xp1
        echo Changing Xp1 to record variable in $somegloy
echo    ncpdq $DEBUG -O -a Xp1,$recname -F -d Xp1,1,$Xp1len $somegloy $somegloy > $LOGFILE
        ncpdq $DEBUG -O -a Xp1,$recname -F -d Xp1,1,$Xp1len $somegloy $somegloy > $LOGFILE
      fi
    done
    echo Combining $somevar.gloy files...
    ncrcat $DEBUG $somevar.$somepre.gloy.*.nc $somevar.$somepre.glob.nc > $LOGFILE
#    rm $somevar.$somepre.gloy.*.nc

    # recname is still valid, so change back to it without testing for it again
    if [ ${#withX} -gt 1 ]; then
      echo Changing $recname to record variable in $somevar.$somepre.glob.nc
      ncpdq $DEBUG -O -a $recname,X $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc > $LOGFILE
    fi

    if [ ${#withXp1} -gt 1 ]; then
      echo Changing $recname to record variable in $somevar.$somepre.glob.nc
      ncpdq $DEBUG -O -a $recname,Xp1 $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc > $LOGFILE
    fi
    if [ "$recname" = "$myrecname" ]; then
     # only for variables that did not have a record dimension to begin with
     echo "removing record dimension $recname from $somevar.$somepre.glob.nc"
     ncwa $DEBUG -O -a $recname $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc
    fi
    ncks $DEBUG -A $somevar.$somepre.glob.nc $somepre.glob.nc > $LOGFILE
#   rm $somevar.$somepre.glob.nc
  done
  if [ -f iter.$somepre.glob.nc ]; then
    ncks $DEBUG -A iter.$somepre.glob.nc $somepre.glob.nc > $LOGFILE
  fi
# rm iter.$somepre.glob.nc

  # another hack by rpa to accomodate grid.nc files
  # (there are several variables with just Z dimension that we want to keep)
#  varsz=$( ncdump -h $somepre.t001.nc | sed -n 's/^\s*\(double\|float\).* \(\w*\)(Z\w*).*/\2/p' )
  # The OR ("\|") and "\s", "\w" only work for GNU-sed, but not for
  # BSD-sed or SunOS-sed, therefore we need to use some work-arounds:
  varsz=$( ncdump -h $somepre.t001.nc | egrep "double|float" \
           | grep -v , | sed -n 's/.* \(.*\)(Z.*).*/\1/p' )
  fixed=
  for varz in $varsz
  do
     # check to make sure the variable does not already exist in the glob file
     if [[ -z $( ncdump -h $somepre.glob.nc | grep " $varz(" ) ]]
     then
        echo "Adding variable $varz to $somepre.glob.nc"
        ncks $DEBUG -A -v $varz $somepre.t001.nc $somepre.glob.nc > $LOGFILE
     fixed='yes'
     fi
  done

  cp $somepre.glob.nc $DIRORIG
done


cd $DIRORIG
rm -rf $DIRNAME

