#!/bin/bash
# set -x
#
#  Based on globus submission script for pbs
#
#  Submits job to SLURM.
#  Input: path to grami file (same as Globus).
#
# The temporary job script is created for the submission and then removed 
# at the end of this script. 

echo "----- starting submit_slurm_job -----" 1>&2
joboption_lrms=SLURM

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

pkgdatadir="$basedir"

. ${pkgdatadir}/submit_common.sh || exit $?

##############################################################
# Parse grami file, read arc config
##############################################################

init $1

read_arc_conf

failures_file="$joboption_controldir/job.$joboption_gridid.failed"

if [ -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  if [ -z "${RUNTIME_LOCAL_SCRATCH_DIR}" ] ; then
    echo "Need to know at which directory to run job: RUNTIME_LOCAL_SCRATCH_DIR must be set if RUNTIME_NODE_SEES_FRONTEND is empty" 1>&2
    echo "Submission: Configuration error.">>"$failures_file"
    exit 1
  fi
fi

##############################################################
# Zero stage of runtime environments
##############################################################
RTE_stage0

##############################################################
# create job script
##############################################################
mktempscript

is_cluster=true
##############################################################
# Start job script
##############################################################
echo "#!/bin/bash -l" > $LRMS_JOB_SCRIPT
echo "# SLURM batch job script built by grid-manager" >> $LRMS_JOB_SCRIPT

# rerun is handled by GM, do not let SLURM rqueue jobs itself.
echo "#SBATCH --no-requeue" >> $LRMS_JOB_SCRIPT

# write SLURM output to 'comment' file
echo "#SBATCH -e ${joboption_directory}.comment">> $LRMS_JOB_SCRIPT
echo "#SBATCH -o ${joboption_directory}.comment">> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT
# choose queue
if [ ! -z "${joboption_queue}" ] ; then
  echo "#SBATCH -p $joboption_queue" >> $LRMS_JOB_SCRIPT
fi
# project name for accounting
if [ ! -z "${joboption_rsl_project}" ] ; then
  echo "#SBATCH -U $joboption_rsl_project" >> $LRMS_JOB_SCRIPT
fi
# job name for convenience
if [ ! -z "${joboption_jobname}" ] ; then
    #TODO is this necessary? do parts of the infosys need these limitations?
  jobname=`echo "$joboption_jobname" | \
           sed 's/^\([^[:alpha:]]\)/N\1/' | \
           sed 's/[^[:alnum:]]/_/g' | \
	   sed 's/\(...............\).*/\1/'`
  echo "#SBATCH -J '$jobname'" >> $LRMS_JOB_SCRIPT
else
    jobname="gridjob"
    echo "#SBATCH -J '$jobname'" >> $LRMS_JOB_SCRIPT
fi
echo "SLURM jobname: $jobname" 1>&2
# Set up the user's environment on the compute node where the script
# is executed.
echo "#SBATCH --get-user-env=10L" >> $LRMS_JOB_SCRIPT

##############################################################
# (non-)parallel jobs
##############################################################
if [ -z "$joboption_count" ] ; then 
  joboption_count=1
elif [ "$joboption_count" -le 0 ] ; then
  joboption_count=1
fi

nodes_string="#SBATCH -n ${joboption_count}"

i=0
eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
while [ ! -z "${var_is_set}" ] ; do
  eval "var_value=\${joboption_nodeproperty_$i}"
  nodes_string="${nodes_string}:${var_value}"
  i=$(( $i + 1 ))
  eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
done
echo "$nodes_string" >> $LRMS_JOB_SCRIPT

##############################################################
# Execution times (minutes)
##############################################################
if [ ! -z "$joboption_cputime" ] ; then
  if [ $joboption_cputime -lt 0 ] ; then
    joboption_cputime=0
  fi
  # this is actually walltime deduced from cputime !
  maxcputime=$(( $joboption_cputime / $joboption_count ))
  cputime_min=$(( $maxcputime / 60 ))
  cputime_sec=$(( $maxcputime - $cputime_min * 60 ))
  echo "#SBATCH -t ${cputime_min}:${cputime_sec}" >> $LRMS_JOB_SCRIPT
fi  
  
if [ -z "$joboption_walltime" ] ; then
  if [ ! -z "$joboption_cputime" ] ; then
    # Set walltime for backward compatibility or incomplete requests
    joboption_walltime=$(( $maxcputime * $walltime_ratio ))
  fi
fi

if [ ! -z "$joboption_walltime" ] ; then
  if [ $joboption_walltime -lt 0 ] ; then
    joboption_walltime=0
  fi
  maxwalltime="$joboption_walltime"
  walltime_min=$(( $maxwalltime / 60 ))
  walltime_sec=$(( $maxwalltime - $walltime_min * 60 ))
  echo "#SBATCH -t ${walltime_min}:${walltime_sec}" >> $LRMS_JOB_SCRIPT
fi

##############################################################
# Requested memory (mb)
##############################################################

set_req_mem

if [ ! -z "$joboption_memory" ] ; then
  echo "#SBATCH --mem ${joboption_memory}mb" >> $LRMS_JOB_SCRIPT
fi

echo "" >> $LRMS_JOB_SCRIPT
echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT
echo "umask 077" >> $LRMS_JOB_SCRIPT

##############################################################
# Add environment variables
##############################################################
add_user_env

##############################################################
# Check for existance of executable,
# there is no sense to check for executable if files are 
# downloaded directly to computing node
##############################################################
if [ -z "${joboption_arg_0}" ] ; then
  echo 'Executable is not specified' 1>&2
  rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  echo "Submission: Job description error.">>"$failures_file"
  exit 1
fi

#######################################################################
# copy information useful for transfering files to/from node directly
#######################################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  setup_local_transfer
fi

######################################################################
# Adjust working directory for tweaky nodes
# RUNTIME_GRIDAREA_DIR should be defined by external means on nodes
######################################################################
if [ ! -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  setup_runtime_env
else
  echo "RUNTIME_JOB_DIR=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid" >> $LRMS_JOB_SCRIPT
  echo "RUNTIME_JOB_DIAG=$RUNTIME_LOCAL_SCRATCH_DIR/${joboption_gridid}.diag" >> $LRMS_JOB_SCRIPT
  echo "RUNTIME_GRIDAREA_DIR=" >> $LRMS_JOB_SCRIPT
  RUNTIME_STDIN_REL=`echo "${joboption_stdin}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDOUT_REL=`echo "${joboption_stdout}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDERR_REL=`echo "${joboption_stderr}" | sed "s#^${joboption_directory}/*##"`
  if [ "$RUNTIME_STDIN_REL" = "${joboption_stdin}" ] ; then
    echo "RUNTIME_JOB_STDIN=\"${joboption_stdin}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDIN=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDIN_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDOUT_REL" = "${joboption_stdout}" ] ; then
    echo "RUNTIME_JOB_STDOUT=\"${joboption_stdout}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDOUT=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDOUT_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDERR_REL" = "${joboption_stderr}" ] ; then
    echo "RUNTIME_JOB_STDERR=\"${joboption_stderr}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDERR=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDERR_REL\"" >> $LRMS_JOB_SCRIPT
  fi
fi

##############################################################
# Add std... to job arguments
##############################################################
include_std_streams

##############################################################
#  Move files to local working directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
##############################################################
move_files_to_node

echo "" >> $LRMS_JOB_SCRIPT
echo "RESULT=0" >> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT


#####################################################
#  Download input files
####################################################
download_input_files

##############################################################
#  Skip execution if something already failed
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime configuration at computing node
##############################################################
RTE_stage1

##############################################################
#  Diagnostics
##############################################################
echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT
cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
if [ ! "X$SLURM_NODEFILE" = 'X' ] ; then
  if [ -r "$SLURM_NODEFILE" ] ; then
    cat "$SLURM_NODEFILE" | sed 's/\(.*\)/nodename=\1/' >> "$RUNTIME_JOB_DIAG"
  else
    SLURM_NODEFILE=
  fi
fi
EOSCR

##############################################################
#  Check intermediate result again
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Execution
##############################################################
cd_and_run

##############################################################
#  End of RESULT checks
##############################################################
echo "fi" >> $LRMS_JOB_SCRIPT
echo "fi" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime (post)configuration at computing node
##############################################################
configure_runtime

#####################################################
#  Upload output files
####################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  upload_output_files
else
# There is no sense to keep trash till GM runs uploader
  echo 'if [ ! -z  "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then' >> $LRMS_JOB_SCRIPT
# Delete all files except listed in job.#.output
  echo '  find ./ -type l -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT

  if [ -f "$joboption_controldir/job.$joboption_gridid.output" ] ; then
    cat "$joboption_controldir/job.$joboption_gridid.output" | \
    # remove leading backslashes, if any
    sed 's/^\/*//' | \
    # backslashes and spaces are escaped with a backslash in job.*.output. The
    # shell built-in read undoes this escaping.
    while read name rest; do

      # make it safe for shell by replacing single quotes with '\''
      name=`printf "%s" "$name"|sed "s/'/'\\\\\\''/g"`;

      # protect from deleting output files including those in the dynamic list
      if [ "${name#@}" != "$name" ]; then     # Does $name start with a @ ?

        dynlist=${name#@}
        echo "  dynlist='$dynlist'" >> $LRMS_JOB_SCRIPT
        cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
  chmod -R u-w "./$dynlist" 2>/dev/null
  cat "./$dynlist" | while read name rest; do
    chmod -R u-w "./$name" 2>/dev/null
  done
EOSCR
      else

        echo "  chmod -R u-w \"\$RUNTIME_JOB_DIR\"/'$name' 2>/dev/null" >> $LRMS_JOB_SCRIPT
      fi
    done
  fi

  echo '  find ./ -type f -perm +200 -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo 'fi' >> $LRMS_JOB_SCRIPT
fi
echo "" >> $LRMS_JOB_SCRIPT

##############################################################
#  Move files back to session directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
# !!!!!!!!!!!!!!!!!!! would be better to know the names of files !!!!!!!!!!!
##############################################################
move_files_to_frontend

#######################################
#  Submit the job
#######################################
echo "SLURM job script built" 1>&2
# Execute sbatch command
cd "$joboption_directory"
echo "SLURM script follows:" 1>&2
echo "-------------------------------------------------------------------" 1>&2
cat "$LRMS_JOB_SCRIPT" 1>&2
echo "-------------------------------------------------------------------" 1>&2
echo "" 1>&2
SLURM_RESULT=1
SLURM_TRIES=0
while [ "$SLURM_TRIES" -lt '10' ] ; do

    # Unset all environment variables before calling sbatch. Otherwise
    # SLURM will forward them to the job and leak information about
    # the grid-manager.
    # TODO: Maybe we only should unset $ARC_*, $CONFIG_*, $GLOBUS_* etc?
  (for i in $(env|grep -v "LRMS_JOB_SCRIPT"|cut -d= -f1);do unset $i;done; \
       ${sbatch} $LRMS_JOB_SCRIPT) 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR
  SLURM_RESULT="$?"
  if [ "$SLURM_RESULT" -eq '0' ] ; then break ; fi 
  if [ "$SLURM_RESULT" -eq '198' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi
  grep 'maximum number of jobs' "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  if [ $? -eq '0' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi
  # A rare SLURM error, but may cause chaos in the information/accounting system
  grep 'unable to accept job' "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  if [ $? -eq '0' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi 
  SLURM_TRIES=$(( $SLURM_TRIES + 1 ))
  sleep 2
done
if [ $SLURM_RESULT -eq '0' ] ; then

#TODO test what happens when the jobqueue is full or when the slurm ctld is not responding
   slurm_version=`${sbatch} --version`
   if [ "$slurm_version" = "slurm 2.2.1" ] || [ "$1" \> "slurm 2.2.1" ]; then
     # newer versions, sbatch would output to stdout 
     job_id=`cat $LRMS_JOB_OUT | sed 's/Submitted batch job \([0-9]*\)$/\1/'`
   else
     # older versions, sbatch would output to stderr and the output is slightly different 
     job_id=`cat $LRMS_JOB_ERR | sed 's/sbatch: Submitted batch job \([0-9]*\)$/\1/'`
   fi
   if [ "${job_id}" = "" ] ; then
      echo "job *NOT* submitted successfully!" 1>&2
      echo "failed getting the slurm jobid for the job!" 1>&2
      echo "Submission: Local submission client behaved unexpectedly.">>"$failures_file"
   else
      echo "joboption_jobid=$job_id" >> $arg_file
      echo "job submitted successfully!" 1>&2
      echo "local job id: $job_id" 1>&2
      # Remove temporary job script file
      rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR
      echo "----- exiting submit_slurm_job -----" 1>&2
      echo "" 1>&2
      exit 0
   fi
else
  echo "job *NOT* submitted successfully!" 1>&2
  echo "got error code from sbatch: $SLURM_RESULT !" 1>&2
  echo "Submission: Local submission client failed.">>"$failures_file"
fi
echo "Output is:" 1>&2
cat $LRMS_JOB_OUT 1>&2
echo "Error output is:"
cat $LRMS_JOB_ERR 1>&2
rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
echo "----- exiting submit_slurm_job -----" 1>&2
echo "" 1>&2
exit 1
