#!/bin/bash

handle_logs () {
    if [[ ! -z $QCMD_TPID ]]; then
        kill $QCMD_TPID
    fi

    if [[ -f $QCMD_LOG ]]; then
        if [[ ! -z $LOGPATH ]]; then
            echo -e "\n\nWriting output to specified log file at:"
            echo "    $LOGPATH"
            echo -e "\nQCMD: $1" >> $QCMD_LOG
            mv $QCMD_LOG $LOGPATH
        else
            rm -f $QCMD_LOG
        fi
    fi
    
    rm $QCMD_SCRIPT $QCMD_IDFILE
}

check_job () {
    if ! kill -0 $QCMD_SPID >& /dev/null; then
        >&2 echo "Error: PBS could not successfully start job. Exiting..."
        exit 1
    fi

    sleep 1
}

clean_up () {
    if [[ -n $QCMD_SPID ]]; then
        kill $QCMD_SPID
        >&2 echo "qcmd job terminated $QCMD_JOB"
        handle_logs "Job terminated before completion"
    else
        rm -f $QCMD_SCRIPT $QCMD_IDFILE $QCMD_PROCFILE
    fi

    exit 1
}

# Throttle and then check for too many on node
QCMD_ALLOW=$(pgrep -u $USER qcmd | xargs ps --no-headers -o pid --sort=etime -p | head -n 5 | xargs)

if [[ " $QCMD_ALLOW " != *" $$ "* ]]; then
    >&2 echo "Error: already at max qcmd job count (5)"
    exit 1
fi

trap clean_up SIGHUP SIGINT SIGQUIT SIGTERM

# Here are the default settings at the moment
NODES=1
CPUS=8
#MPIPROCS=1
QUEUE=build
WALLTIME=3600

usage () {
cat << EOF
Usage: qcmd [Slurm options] -- executable [arguments]

This script will run a command/executable on the build queue using one node
by default. Any arguments submitted to the script will be forwarded to the qsub
command, enabling you to override script defaults (e.g., walltime). Note that
commands run using qcmd will *not* have interactive terminal support. Please
use qinteractive or qsub -I for interactive jobs. Jobs created by qcmd will be
provided with a login node environment, enabling activities like compilation.

You have three ways of selecting an account to use for the submission:
FIXME FIXME

1. Use the PBS -A [account] command line argument
2. Set the PBS_ACCOUNT environment variable
3. Let qcmd randomly select from your active project list

Current PBS_ACCOUNT value = $PBS_ACCOUNT
EOF
exit $1
}

# Parse command line arguments
ARGV="$@"
PBSOPT="${ARGV%%-- *}"
EXECMD="${ARGV##*-- }"

for n in $PBSOPT; do
    case $n in
        --help)
            usage 0
            ;;
        -o*)
            if [[ $n == -o ]]; then
                READVAL=logpath
            else
                LOGPATH=${n/-o/}
            fi
            ;;
        -e*)
            >&2 echo "Note: separate output and error log files are not supported by qcmd"
            ;;
        *)
            case $READVAL in
                logpath)
                    LOGPATH=$n
                ;;
            esac
            READVAL=
            ;;
    esac
done

if [[ $ARGV != *"-- "* ]]; then
    >&2 echo "Error: no command specified!"
    usage 1
fi


# Create default list of arguments
PBSARGS="-N ${NODES} --ntasks-per-node ${CPUS} --ntasks 8 --time ${WALLTIME} -p ${QUEUE} ${PBSARGS}"

echo "Submitting command to Slurm:"
echo -e "    $EXECMD\n"

# Put any user requests second to override defaults
if [[ ! -z $PBSOPT ]]; then
    echo -e "Requested custom PBS options:\n    $PBSOPT\n"

    PBSARGS="$PBSARGS $PBSOPT"
fi

# If running in a PBS job, use original shell
if [[ ! -z $PBS_O_SHELL ]]; then
    JOBSHELL=$PBS_O_SHELL
else
    JOBSHELL=$SHELL
fi

# Submit job request using temp script and write output to home directory
mkdir -p $HOME/.qcmd
QCMD_SCRIPT=$HOME/.qcmd/script.$$
QCMD_IDFILE=$HOME/.qcmd/jobid.$$
QCMD_LOG=$HOME/.qcmd/out.$$
cat > $QCMD_SCRIPT << EOF
#!$JOBSHELL
$EXECMD
EOF
chmod +x $QCMD_SCRIPT

# Submit job in background and then get job ID
sbatch $PBSARGS -o $QCMD_LOG --wait --job-name qcmd $QCMD_SCRIPT > $QCMD_IDFILE &
QCMD_SPID=$!

while [[ ! -s $QCMD_IDFILE ]]; do
    check_job
done

QCMD_JOB=$(cat $QCMD_IDFILE | awk '{print $4}')

# Check that job ID was produced
if [[ ! $QCMD_JOB =~ ^[0-9]+ ]]; then
    echo $QCMD_JOB
    >&2 echo "Error: error in qsub submission. Exiting ... ${QCMD_JOB} | ${QCMD_IDFILE}"
    #clean_up
fi

echo -e "Waiting for job $QCMD_JOB to start ... \n\n"

while [[ ! -f $QCMD_LOG ]]; do
    check_job
done

# Follow job output
tail -n +1 -f $QCMD_LOG &
QCMD_TPID=$!

# Wait for the job to finish and give a little time to empty buffer
wait $QCMD_SPID
JOBCODE=$?
sleep 5

# Clean up
handle_logs "Job exited normally"
exit $JOBCODE
