#!/bin/sh

# Helper script to flag done LoadLeveler jobs.
# The script is called periodically by the grid-manager.
#

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

libexecdir="${ARC_LOCATION:-/usr/local}/libexec/arc/"
pkgdatadir="$basedir"

# Assume that gm-kick is installed in the same directory
GMKICK=${libexecdir}/gm-kick

# Does the control directory exist?
control_dir="$1"
test -d "$control_dir" || exit 1

# Get LoadLeveler environment
. "${pkgdatadir}/configure-ll-env.sh" || exit $?

. "${pkgdatadir}/scan_common.sh" || exit $?


my_id=`id -u`

#for i in "$control_dir"/job.*.local
find "$control_dir/processing" -maxdepth 1 -type f -name 'job.*.status' \
| sed 's/processing\/job\.\([^\.]*\)\.status$/job.\1.local/' \
| while read i
do
  # Continue if no glob expansion or other problems
  test -f "$i" || continue

  jobid=`basename $i .local|sed 's/^job.//'`
  donefile="${control_dir}/job.${jobid}.lrms_done"
  statusfile="${control_dir}/processing/job.${jobid}.status"
  jobfile="${control_dir}/job.${jobid}.local"
  errorsfile="${control_dir}/job.${jobid}.errors"

  # Continue if the job is already flagged as done?
  test -f "$donefile" && continue

  if [ ! -f "$statusfile" ] ; then continue ; fi
  gmstatus=`cat "$statusfile"`
  if [ "$gmstatus" != "INLRMS" ] && [ "$gmstatus" != "CANCELING" ] ; then continue ; fi

  # Get local LRMS id of job by evaluating the line with localid
  localid=`grep ^localid= $i|head -1`
  eval $localid

  # Did we get a local id?
  test "$localid" = "" && continue

  # Get job status
  if status=`$LL_BIN_PATH/llq -l $localid|grep '^ *Status'`
  then
    status=`echo $status | sed 's/^ *Status: //'`
    case "$status" in
      		Completed) ;;
     		 Canceled) continue;;
                        *) continue;;
    esac
  fi

  exitcode=''

  # get session directory of this job
  sessiondir=`grep -h '^sessiondir=' "$control_dir/job.${jobid}.local" | sed 's/^sessiondir=\(.*\)/\1/'`
  diagfile="${sessiondir}.diag"
  commentfile="${sessiondir}.comment"

  if [ "$my_id" != '0' ] ; then
    if [ ! -O "$jobfile" ] ; then continue ; fi
  fi
  uid=$(get_owner_uid "$jobfile")
  [ -z "$uid" ] && { log "Failed to stat $jobfile"; continue; }

  if [ ! -z "$sessiondir" ] ; then
    # have chance to obtain exit code
    exitcode=$(do_as_uid "$uid" "grep '^exitcode=' '$diagfile'" | sed 's/^exitcode=//')
  else
    continue
  fi

  if [ ! -z "$exitcode" ] ; then
    if [ "$exitcode" = "152" -o $exitcode = "24" ] ; then
      exitcode="24"
      save_commentfile "$uid" "${sessiondir}.comment" "$errorsfile"
      echo "$exitcode Job exceeded time limit." > "$donefile"
      # If job exceeded time, then it will have been killed and no cputime/walltime has been written
      walltime=`$LL_BIN_PATH/llq -l $localid|sed -n 's/^ *Wall Clk Hard Limit:.*(\([0-9]*\) seconds.*/\1/p'`
      usertime=`$LL_BIN_PATH/llq -l $localid|sed -n 's/^ *Step Cpu Hard Limit:.*(\([0-9]*\) seconds.*/\1/p'`
      starttime=`$LL_BIN_PATH/llq -l $localid|sed -n 's/^ *Dispatch Time: \(.*\)/\1/p'`
      endtime=`$LL_BIN_PATH/llq -l $localid|sed -n 's/^ *Completion Date: \(.*\)/\1/p'`

      if [ -n "$starttime" ]; then
        date_to_utc_seconds "$starttime"
        seconds_to_mds_date "$return_date_seconds"
        starttime=$return_mds_date
      fi
      if [ -n "$endtime" ]; then
        date_to_utc_seconds "$endtime"
        seconds_to_mds_date "$return_date_seconds"
        endtime=$return_mds_date
      fi

      job_read_diag

      [ -n "$walltime" ] && WallTime=${walltime}
      [ -n "$usertime" ] && UserTime=${usertime}
      [ -n "$usertime" ] && KernelTime=0
      [ -n "$starttime" ] && LRMSStartTime=${starttime}
      [ -n "$endtime" ] && LRMSEndTime=${endtime}
      #This needs investigating, might be user program exit code
      [ -n "$exitcode" ] && LRMSExitcode=$exitcode

      job_write_diag

      ${GMKICK} "$jobfile"
      continue
    fi
    # job finished and exit code is known
    save_commentfile "$uid" "${sessiondir}.comment" "$errorsfile"
    echo "$exitcode Executable finished with exit code $exitcode" >> "$donefile"
    ${GMKICK} "$jobfile"
    continue
  fi
  exitcode=-1
  save_commentfile "$uid" "${sessiondir}.comment" "$errorsfile"
  echo "$exitcode Job finished with unknown exit code" >> "$donefile"
  ${GMKICK} "$jobfile"
done

sleep 60
exit 0
