#!/bin/sh

progname=$(basename "$0")

LRMS=Condor # for use in log messages

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

joboption_lrms="condor"
lrms_options="condor_requirements condor_rank condor_bin_path condor_config"
queue_options="condor_requirements"

# define paths and config parser
basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?
. "${basedir}/lrms_common.sh"

# include common scan functions
. "${pkgdatadir}/scan_common.sh" || exit $?

# run common init 
#  * parse config
#  * load LRMS-specific env
#  * set common variables
common_init

# Log system performance
if [ ! -z "$perflogdir" ]; then
   perflog_common "$perflogdir" "$CONFIG_controldir"
fi

#############################################################################
########################## LRMS specific functions ##########################
#############################################################################

#
# Should print the id's of all jobs in the LRMS, one per line. If left
# unimplemented then lrms_job_finished must be implemented. If it's
# implemented then implementing lrms_job_finished is optional. 
#
lrms_list_jobs() {
    LIST_IMPLEMENTED=
}

#
# Should return 0 only if the job is not in the LRMS. The job's LRMS id is
# stored in the lrmsid variable. It's called for all grid jobs that are in
# INLRMS and CANCELING states and whose LRMS id was not listed by
# lrms_list_jobs. STDOUT and STDERR are redirected to job.$gridid.error.
#
lrms_job_finished() {
    return 0
}

#
# Should attempt to collect accounting info from LRMS for a job.  The job's
# LRMS id is stored in the lrmsid variable. This function will be called after
# the job has left the LRMS.  Diagnostics might not be available right after
# the job has finished and therefore a retry mechanism is implemented. If more
# time is needed, the function should signal this by returning without setting
# the LRMSExitcode variable. In this case it will be called again on the next
# run on scan-*-jobs, but not more than $maxwait times for any given job. If
# it sets LRMSExitcode, or $maxwait retries have already been done, then 
# lrms_last_call will be called shortly afterwards and the job declared done.
# STDOUT and STDERR are redirected to job.$gridid.errors. The interval between
# successive runs of scan-*-jobs is controlled by $wakeupperiod.
# Input variables:
#   * gridid
#   * lrmsid
#   * sessiondir
#   * uid -- numerical unix ID of the user owning the job
# The following variables are initialized with values read from
# $sessiondir.diag. All except exitcode are expected to be updated by this
# function:
#   * exitcode -- It's the exitcode of the user's executable, as reported by
#                 the job wrapper. Do not change.
#   * nodename -- may contain multiple lines, one execution node per line
#   * WallTime  -- in seconds
#   * UserTime  -- in seconds
#   * KernelTime  -- in seconds
#   * TotalMemory  -- in kB
#   * ResidentMemory -- in kB
#   * LRMSStartTime -- in Mds time format, UTC time zone (20091201140049Z)
#   * LRMSEndTime -- in Mds time format, UTC time zone (20091201140049Z)
# Output variables:
#   * LRMSExitcode -- as reported by the LRMS. It will be saved to the .diag file
#   * LRMSMessage -- any clues obtained from the LRMS about job failure. It
#                    content will be addedd to .lrms_done in case LRMSExitcode is not 0.
#
lrms_get_accounting() {
    ACCT_IMPLEMENTED=
    LRMSExitcode=${exitcode:--1}
}

#
# Called just before uptading .diag and writing the .lrms_done file. STDOUT and
# STDERR are redirected to job.$gridid.error. Can be left as is.
# Input/Output variables:
#   * the same as for lrms_get_accounting
#   * any variables set in lrms_get_accounting are visible here
#
lrms_last_call() {
    [ -n "$LRMSExitcode" ] \
    || log "LRMS exit status not available for job $gridid ($LRMS id: $lrmsid)"

    [ -n "$ACCT_IMPLEMENTED" ] || LRMSExitcode=

    # Suspect killing due to resource limit exceeded only if exitcode is
    # missing or is > 128 (as in the case of a shell killed by a signal)
    if [ -z "$exitcode" ] || [ "$exitcode" -gt 128 ]; then
        read_grami; autodetect_overlimit
    fi
}

#############################################################################

#
# scan-*-jobs has STDOUT redirected to /dev/null and STDERR redirected to
# job.helper..errors
#
log () { echo "[`date +%Y-%m-%d\ %T`] $progname: $*" 1>&2; }

#
# Reads a line from STDIN and prints integer part on STDOUT.
# If not a valid number, prints nothing and returns 1
#
to_integer() {
    /usr/bin/perl -we 'chomp(my $line = <>);
              exit 0 if $line eq "";
              if ( $line =~ m/^(\d*)(?:\.\d+)?$/ ) {
                  print $1 || 0;
              } else {
                  exit 1;
              }'
}


# Input variables
#   * gridid
# Output variables:
#   * ReqWallTime
#   * ReqCPUTime
#   * ReqTotalMemory
read_grami() {

    gramifile="job.$gridid.grami"

    [ -f "$gramifile" ] || { log "grami file not found: $PWD/$gramifile"; return 1; }

    ReqWallTime=$(sed -n "s/^joboption_walltime=//p" "$gramifile" | tail -n 1)
    ReqCPUTime=$(sed -n "s/^joboption_cputime=//p" "$gramifile" | tail -n 1)
    ReqTotalMemory=$(sed -n "s/^joboption_memory=//p" "$gramifile" | tail -n 1)

    ReqWallTime=$(echo $ReqWallTime | to_integer) || log "joboption_walltime not a number"
    ReqCPUTime=$(echo $ReqCPUTime | to_integer) || log "joboption_cputime not a number"
    ReqTotalMemory=$(echo $ReqTotalMemory | to_integer) || log "joboption_memory not a number"

    # convert MB to KB
    [ -n "$ReqTotalMemory" ] && ReqTotalMemory=$(( $ReqTotalMemory * 1024 ))

    log "---- Requested resources specified in grami file ----"
    [ -n "$ReqWallTime" ] && log "Requested walltime: $ReqWallTime seconds"
    [ -n "$ReqCPUTime" ] && log "Requested cputime: $ReqCPUTime seconds"
    [ -n "$ReqTotalMemory" ] && log "Requested memory: $(( $ReqTotalMemory / 1024 )) MB"
    log "-----------------------------------------------------"
}

#
# Can be used from lrms_get_accounting() to guess whether the job was killed due to
# an exceeded resource limit and set LRMSMessage accordingly.
# Input variables
#   * gridid
#   * uid
#   * ReqWallTime
#   * ReqCPUTime
#   * ReqTotalMemory
#   * WallTime
#   * UserTime
#   * KernelTime
#   * TotalMemory
#   * ResidentMemory
#   * exitcode
#   * LRMSExitcode
#   * LRMSMessage
# Output variables:
#   * overlimit (if set, then one of memory cputime walltime )
#
autodetect_overlimit() {

    # round to integers
    wallt=$(echo $WallTime | to_integer) || log "WallTime not a number"
    usert=$(echo $UserTime | to_integer) || log "UserTime not a number"
    kernelt=$(echo $KernelTime | to_integer) || log "KernelTime not a number"
    totalmem=$(echo $TotalMemory | to_integer) || log "TotalMemory not a number"
    residentmem=$(echo $ResidentMemory | to_integer) || log "ResidentMemory not a number"

    cput=$(( ${usert:-0} + ${kernelt:-0} ))

    if [ -n "$cput" ] && [ "$cput" -gt 0 ] \
    && [ -n "$ReqCPUTime"  ] && [ "$ReqCPUTime" -gt 0 ] \
    && [ $(( 100 * $cput / $ReqCPUTime )) -gt 95 ]; then
      overlimit="cputime"
    fi
    if [ -n "$wallt" ] && [ "$wallt" -gt 0 ] \
    && [ -n "$ReqWallTime" ] && [ "$ReqWallTime" -gt 0 ] \
    && [ $(( 100 * $wallt / $ReqWallTime )) -gt 95 ]; then
      overlimit="walltime"
    fi
    if [ -n "$totalmem" ] && [ "$totalmem" -gt 0 ] \
    && [ -n "$ReqTotalMemory" ] && [ "$ReqTotalMemory" -gt 0 ] \
    && [ $(( 100 * $totalmem / $ReqTotalMemory )) -gt 95 ]; then
      overlimit="memory"
    fi
    if [ -n "$residentmem" ] && [ "$residentmem" -gt 0 ] \
    && [ -n "$ReqTotalMemory" ] && [ "$ReqTotalMemory" -gt 0 ] \
    && [ $(( 100 * $residentmem / $ReqTotalMemory )) -gt 95 ]; then
      overlimit="memory"
    fi

    [ -n "$overlimit" ] && log "Job have likely hit $overlimit limit"
}

#
# Returns 0 at most maxwait calls for any given gridid. Returns 1 on
# further calls or if an error has occured.
#
job_canwait() {
    [ -n "$gridid" ] && [ -n "$maxwait" ] \
    || { log "job_canwait requires the following to be set: gridid, maxwait"; return 1; }

    countfile=job.$gridid.lrms_job
    if [ ! -f "$countfile" ]; then
        echo "1" > "$countfile" || { log "cannot write count file: $PWD/$countfile"; return 1; }
    else
        count=$(head -n 1 "$countfile") || { log "cannot read count file: $PWD/$countfile"; return 1; }
        [ -z "$count" ] && { log "empty count file: $PWD/$countfile"; return 1; }
        dummy=$(echo "$count" | grep -v '[0-9]') && { log "not an integer in count file: $PWD/$countfile"; return 1; }
        [ "$count" -lt "$maxwait" ] || { rm -f "$countfile"; return 1; }
        echo "$(( $count + 1 ))" > "$countfile" || { log "cannot write count file: $PWD/$countfile"; return 1; }
    fi
    return 0
}

#
# Append .comment (containing STDOUT & STDERR of the job wrapper) to .errors
# Input variables:
#   * uid
#   * sessiondir

job_print_comment() {
    [ -n "$uid" ] && [ -n "$sessiondir" ] \
    || { log "job_print_comment requires the following to be set: uid, sessiondir"; return 1; }

    commentfile=$sessiondir.comment
    [ -f "$commentfile" ] && do_as_uid "$uid" "
        echo '--------- Contents of output stream forwarded by $LRMS ------------'
        cat '$commentfile'
        echo '------------------------- End of output -------------------------'
    " || log "failed reading: $commentfile"
}

# In case overlimit is set, tweak what will go into .lrms_done
set_overlimit_message() {

    [ -n "$overlimit" ] || return

    if [ $overlimit = "cputime" ]; then
        LRMSMessage="job killed: cput"
    elif [ $overlimit = "walltime" ]; then
        LRMSMessage="job killed: wall"
    elif [ $overlimit = "memory" ]; then
        LRMSMessage="job killed: vmem"
    else
        log "invalid value overlimit=$overlimit";
        return 1
    fi

    LRMSExitcode=271
}

#
# Input variables:
#   * gridid
#   * basedir
#   * exitcode
#   * LRMSExitcode
#   * LRMSMessage
#   * overlimit
#
job_write_donefile() {
    [ -n "$gridid" ] && [ -n "$basedir" ] && [ -n "$LRMS" ] \
    || { log "job_write_donefile requires the following to be set: gridid, basedir, LRMS"; return 1; }

    set_overlimit_message

    if [ -n "$LRMSMessage" ] && [ "$LRMSExitcode" != 0 ]; then

        msg="$LRMSMessage"

    else
        if [ "$exitcode" = 0 ]; then
            if [ -z "$LRMSExitcode" ] || [ "$LRMSExitcode" = 0 ]; then
                msg=
            else
                msg="Job finished properly but $LRMS reported failure"
            fi
        elif [ -z "$exitcode" ]; then
            if [ "$LRMSExitcode" = 0 ]; then LRMSExitcode=-1; fi
            msg="Job was killed by $LRMS"
        else
            if [ "$LRMSExitcode" = 0 ]; then LRMSExitcode=-1; fi
            msg="Job failed with exit code $exitcode"
        fi
    fi

    log "${msg:-$LRMS job $lrmsid finished normally}"

    donefile=job.$gridid.lrms_done
    echo "${LRMSExitcode:--1} $msg" > $donefile || log "failed writing file: $PWD/$donefile"

    # wake up GM
    "${pkglibexecdir}/gm-kick" -j "$gridid" "job.$gridid.local"
}

#
# Should check that the job has exited lrms, and then do whatever post-processing is necesarry.
# Called with STDOUT and STDERR redirected to the job.*.errors file.
# Input variables:
#   * gridid
#   * lrmsid
#   * uid
#
process_job() {

    [ -n "$gridid" ] && [ -n "$lrmsid" ] && [ -n "$uid" ] && [ -n "$LRMS" ] \
    || { log "process_job requires the following to be set: gridid, lrmsid, uid, LRMS"; return 1; }
 
    lrms_job_finished || return

    log "[$(date +%Y-%m-%d\ %T)] $LRMS job $lrmsid has exited"
 
    localfile=job.$gridid.local
    sessiondir=$(sed -n 's/^sessiondir=//p' "$localfile" | tail -n 1)
    [ -n "$sessiondir" ] || { log "failed reading sessiondir from: $PWD/$localfile"; return 1; }

    # move diag file that end-up in session directory after condor transfer_output (shared_filesystem = no)
    [ -f "${sessiondir}/${sessiondir##*/}.diag" ] && mv "${sessiondir}/${sessiondir##*/}.diag" "${sessiondir}.diag"

    job_read_diag
    lrms_get_accounting
 
    if [ -z "$LRMSExitcode" ] && job_canwait; then
        : # Come back again next time
    else
        rm -f "$countfile"
        job_print_comment
        lrms_last_call
        job_write_diag
        job_write_donefile
    fi
}


scan_init () {

    [ -n "$basedir" ] || { log "basedir must be set"; exit 1; }
    [ -n "$LRMS" ] || { log "LRMS must be set"; exit 1; }

    LIST_IMPLEMENTED=yes
    ACCT_IMPLEMENTED=yes

    maxwait=5
    wakeupperiod=60

    trap 'sleep $wakeupperiod' EXIT TERM

    TMPDIR=${TMPDIR:-/tmp}
    export TMPDIR

    # default is shared sessiondirs
    if [ -z "$CONFIG_shared_filesystem" ]; then
      CONFIG_shared_filesystem=yes
    elif [ "$CONFIG_shared_filesystem" = 'no' ]; then
      CONFIG_shared_filesystem=
    fi
}


scan_main() {

    # Initial working directory
    myworkdir=$(pwd) || { log "pwd failed"; exit 1; }

    # Validate control directories supplied on command-line
    test -n "$1" || { log "control_dir not specified"; exit 1; }

    for ctr_dir in "$@"; do
        cd "$myworkdir" || { log "cannot cd to $myworkdir"; exit 1; }
        cd "$ctr_dir" || { log "erroneous control dir: $ctr_dir"; exit 1; }
    done

    for ctr_dir in "$@"; do
        cd "$myworkdir" || { log "cannot cd to $myworkdir"; exit 1; }
        cd "$ctr_dir" || { log "erroneous control dir: $ctr_dir"; exit 1; }

        # This perl script scans the 'processing' sub-directory for grid jobs
        # in INLRMS and CANCELING states. If not running as the superuser, also
        # filter out any jobs not belonging to the current user.  Finally,
        # prints to STDOUT onle line for each job containing:
        #   * grid ID, only the digits
        #   * local ID, as in LRMS
        #   * uid of owner of the job.*.local file
        listscript='use English;
                    exit 1 unless opendir(DIR,"processing");
                    while (my $fname = readdir DIR) {
                        my ($gridid, $lrmsid, $status);
                        ($gridid) = ($fname =~ m/^job\.(\w+)\.status$/);
                        next unless defined $gridid;
                        next unless open(STATUS,"< processing/$fname");
                        $status = <STATUS>;
                        close STATUS;
                        next unless $status and $status =~ m/^INLRMS|CANCELING$/;
                        next unless open(LOCAL,"< job.$gridid.local");
                        my @stat = stat(LOCAL);
                        { local $/=undef; ($lrmsid) = (<LOCAL> =~ m/^localid=(\d+)/m) };
                        close LOCAL;
                        next unless $lrmsid;
                        next unless @stat;
                        next unless $EUID == 0 or $EUID == $stat[4];
                        print "$gridid $lrmsid $stat[4]\n";
                    }
                    closedir DIR;
        '

        # This perl script filters out from the output of the previous script
        # jobs whose lrms id is among the arguments passed to the script.
        filterscript='my $lrmsids = " @ARGV ";
                      while(my $line = <STDIN>) {
                          chomp $line;
                          my ($gridid,$lrmsid,$uid) = split / /, $line;
                          next if $lrmsids =~ m/\s$lrmsid\s/;
                          print "$gridid $lrmsid $uid\n";
                      }
        '
        if [ ! -z "$perflogdir" ]; then
           start_ts=`date +%s.%N`
        fi

        lrmsids=$(lrms_list_jobs) || { log "lrms_list_jobs failed"; continue; }

        if [ ! -z "$perflogdir" ]; then
           stop_ts=`date +%s.%N`
           t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"`
           echo "[`date +%Y-%m-%d\ %T`] scan-condor-job, condor_q: $t" >> $perflogfile
        fi

        if [ -n "$LIST_IMPLEMENTED" ]; then
            filter_jobs() { /usr/bin/perl -we "$filterscript" $lrmsids; }
        else
            filter_jobs() { cat; } # no filtering
        fi

        if [ ! -z "$perflogdir" ]; then
           start_ts=`date +%s.%N`
        fi

        /usr/bin/perl -we "$listscript" | filter_jobs | while read gridid lrmsid uid; do

            log () { echo "$progname: $*" 1>&2; }

            donefile=job.$gridid.lrms_done
            [ -f "$donefile" ] && continue

            errorsfile=job.$gridid.errors
            [ -w "$errorsfile" ] || { log "cannot write to errors file at: $PWD/$errorsfile"; continue; }

            # run in separate process to make sure shell vars of one job
            # are not influencing other jobs
            ( process_job; ) >> "$errorsfile" 2>&1
        done
        if [ ! -z "$perflogdir" ]; then
           stop_ts=`date +%s.%N`
           t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"`
           echo "[`date +%Y-%m-%d\ %T`] scan-condor-job, ControlDirTraversalAndProcessing: $t" >> $perflogfile
        fi

    done
}


################################### Condor ####################################

lrms_list_jobs() {
    script='my $cmd="$ENV{CONDOR_BIN_PATH}/condor_q";
            open Q, "$cmd|" or die "Failed running $cmd : $!\n";
            my $out; { local $/; $out = <Q>; }; close Q;
            exit 0 if $out =~ m/All queues are empty/;
            die "Non-zero exit status returned by $cmd\n" if $?;
            my @ids = ($out =~ m/^\s*(\d+)\.\d+\s+/mg);
            print "$_\n" for @ids;
    '
    /usr/bin/perl -we "$script"
}


condor_read_history() {
    # This Perl script reads and prints a per-job condor history file. We need to use a 
    # hash rather than printing the file directly because some attributes appear multiple
    # times and we need to use the last occurrence.
    condorscript='use strict;
                  my %data;
                  if (-e $ARGV[0]) {
                      open(FILE, "<$ARGV[0]");
                      foreach my $line (<FILE>) {
                          if ($line =~ /([\w\+]+)\s=\s(.*)/) {
                              $data{$1} = $2;
                          }
                      }
                      foreach my $key (keys %data) {
                          print $key." = ".$data{$key}."\n";
                      }
                  } 
    '

    # First try per-job history files (best performance)
    perjobhistorydir=`$CONDOR_BIN_PATH/condor_config_val PER_JOB_HISTORY_DIR`
    perjobhistory_exists=$?

    histstring=""
    if [ $perjobhistory_exists -eq 0 ]; then
        # per-job history files are being used, so we can immediately find the right file
        historyfile="$perjobhistorydir/history.$lrmsid.0"
        [ -f "$historyfile" ] && histstring=$( /usr/bin/perl -we "$condorscript" "$historyfile" )
    fi

    # If per-job history is not in place - use common history files (including rotated)
    historydir=`$CONDOR_BIN_PATH/condor_config_val HISTORY`
    if [ -z "$histstring" -a -n "$historydir" ]; then 
        # find the appropriate history file
        historyfile=`grep "$(hostname -s).*#$lrmsid.0" -l $historydir*`
        if [ $? -eq 0 ]; then
            # try to get the full job classad
            { histstring=$( $CONDOR_BIN_PATH/condor_history -l -file $historyfile -match 1 "$lrmsid" ); } 2>&1
        fi
    fi

    # the awk expression checks that the input is more than 1 line long
    echo "$histstring" | awk 'END{if(NR<2){exit 1}}' || return 1

    # Extract information from condor_history output
    __RemoteHost=$(echo "$histstring" | sed -n 's/^LastRemoteHost *= *"\(.*\)"[^"]*$/\1/p')
    __WallTime=$(echo   "$histstring" | sed -n 's/^RemoteWallClockTime *= *\([0-9][0-9]*\).*/\1/p')
    __KernelTime=$(echo "$histstring" | sed -n 's/^RemoteSysCpu *= *\([0-9][0-9]*\).*/\1/p')
    __UserTime=$(echo   "$histstring" | sed -n 's/^RemoteUserCpu *= *\([0-9][0-9]*\).*/\1/p')
    __ImageSize=$(echo  "$histstring" | sed -n 's/^ImageSize *= *//p')
    __ExitCode=$(echo   "$histstring" | sed -n 's/^ExitCode *= *//p')
    ExitStatus=$(echo   "$histstring" | sed -n 's/^ExitStatus *= *//p')
    JobStatus=$(echo    "$histstring" | sed -n 's/^JobStatus *= *//p')
    ExitSignal=$(echo   "$histstring" | sed -n 's/^ExitSignal *= *//p')
    RemoveReason=$(echo "$histstring" | sed -n 's/^RemoveReason *= *"\(.*\)"[^"]*$/\1/p')
    ExitReason=$(echo   "$histstring" | sed -n 's/^ExitReason *= *"\(.*\)"[^"]*$/\1/p')
    JobCurrentStartDate=$(echo  "$histstring" | sed -n 's/^JobCurrentStartDate *= *\([0-9][0-9]*\).*/\1/p')
    EnteredCurrentStatus=$(echo "$histstring" | sed -n 's/^EnteredCurrentStatus *= *\([0-9][0-9]*\).*/\1/p')
    RequestCpus=$(echo  "$histstring" | sed -n 's/^RequestCpus *= *//p')

    echo "$RemoveReason" | grep -q 'PeriodicRemove .*evaluated to \(TRUE\)'
    [ $? = 0 ] && PeriodicRemove=TRUE

    return 0
}


seconds() {
    /usr/bin/perl -e 'my $str = "'"$1"'";
        exit unless $str =~ /(\d+) (\d\d):(\d\d):(\d\d)/;
        printf "%.0f", ( $1 * 24  + $2 ) * 3600 + $3 * 60 + $4;
    '
}
find_in_file() { file=$1; regex=$2;
    grep "$regex" "$file" | tail -n 1 | sed -n "s/\(.*\)$regex\(.*\)/\2/ip";
}


condor_read_log() {

    # Find the Condor log.
    gramifile=job.$gridid.grami
    [ -f "$gramifile" ] || { log "grami file not found: $PWD/$gramifile"; return 1; }
    condor_log=$(sed -n 's/^condor_log=//p' "$gramifile" | tail -n 1)
    [ -n "$condor_log" ] || { log "condor_log not set in grami file: $PWD/$gramifile"; return 1; }
    log "condor log is at: $condor_log"
    [ -r "$condor_log" ] || { log "Condor log file not readable: $condor_log"; return 1; }

    # Parse condor log. Look for lines like:
    #    (return value 0)
    #    Image size of job updated: 692632
    #    Usr 0 00:37:09, Sys 0 00:00:04  -  Total Remote Usage
    #    Job executing on host: <129.240.86.70:32769>

    _RemoteHost=$(   find_in_file "$condor_log" 'Job executing on host: *<\([^:>]*\)' )
    _UserTime=$(   find_in_file "$condor_log" 'Usr \([0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\).*Total Remote Usage' )
    _KernelTime=$( find_in_file "$condor_log" 'Sys \([0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9]\).*Total Remote Usage' )
    _ImageSize=$(find_in_file "$condor_log" 'Image size of job updated: \([0-9][0-9]*\)' )
    _ExitCode=$(   find_in_file "$condor_log"   '(return value \([0-9][0-9]*\))' )

    _UserTime=$(seconds "$_UserTime")
    _KernelTime=$(seconds "$_KernelTime")
}


lrms_get_accounting() {

    condor_read_history || { log "Job has exited but is not yet listed by condor_history"; return 1; }

    # set LRMSExitcode to signal that no more tries are necessary
    LRMSExitcode=-1
}


lrms_last_call() {

    condor_read_log && {

        # override values read from .diag with those from condor log
        nodename=${_RemoteHost:-$nodename}
        UserTime=${_UserTime:-$UserTime}
        KernelTime=${_KernelTime:-$KernelTime}
        TotalMemory=${_ImageSize:-$TotalMemory}

        echo "$progname: ----- begin condor log ($condor_log) -----"
        cat "$condor_log"
        echo "$progname: ----- end condor log ($condor_log) -----"

        echo "$progname: ----- Information extracted from Condor log -----"
        [ -n "$_RemoteHost" ] && echo "$progname: RemoteHost=$_RemoteHost"
        [ -n "$_UserTime" ] && echo "$progname: UserTime=$_UserTime"
        [ -n "$_KernelTime" ] && echo "$progname: KernelTime=$_KernelTime"
        [ -n "$_ImageSize" ] && echo "$progname: ImageSize=$_ImageSize"
        [ -n "$_ExitCode" ] && echo "$progname: ExitCode=$_ExitCode"
        echo "$progname: -------------------------------------------------"
    }

    if [ -z "$LRMSExitcode" ]; then
        log "$progname: No condor_history for Condor ID $lrmsid"
    else

        # override with values from condor_history
        nodename=${__RemoteHost:-$nodename}
        WallTime=${__WallTime:-$WallTime}
        UserTime=${__UserTime:-$UserTime}
        KernelTime=${__KernelTime:-$KernelTime}
        TotalMemory=${__ImageSize:-$TotalMemory}

        echo "$progname: ----- begin condor history message -----"
        echo "$histstring"
        echo "$progname: ----- end condor history message -----"

        echo "$progname: ----- Information extracted from condor_history -----"
        [ -n "$__RemoteHost" ] && echo "$progname: LastRemoteHost=$__RemoteHost"
        [ -n "$__WallTime" ] && echo "$progname: RemoteWallClockTime=$__WallTime"
        [ -n "$__UserTime" ] && echo "$progname: RemoteUserCpu=$__UserTime"
        [ -n "$__KernelTime" ] && echo "$progname: RemoteSysCpu=$__KernelTime"
        [ -n "$__ImageSize" ] && echo "$progname: ImageSize=$__ImageSize"
        [ -n "$__ExitCode" ] && echo "$progname: ExitCode=$__ExitCode"
        [ -n "$ExitStatus" ] && echo "$progname: ExitStatus=$ExitStatus"
        [ -n "$JobStatus" ] && echo "$progname: JobStatus=$JobStatus"
        [ -n "$ExitSignal" ] && echo "$progname: ExitSignal=$ExitSignal"
        [ -n "$RemoveReason" ] && echo "$progname: RemoveReason=$RemoveReason"
        [ -n "$JobCurrentStartDate" ] && echo "$progname: JobCurrentStartDate=$JobCurrentStartDate"
        [ -n "$EnteredCurrentStatus" ] && echo "$progname: EnteredCurrentStatus=$EnteredCurrentStatus"
        [ -n "$ExitReason" ] && echo "$progname: ExitReason=$ExitReason"
        [ -n "$RequestCpus" ] && echo "$progname: RequestCpus=$RequestCpus"
        echo "$progname: -----------------------------------------------------"

        if [ -n "$JobCurrentStartDate" ]; then
            date_seconds_to_utc "$JobCurrentStartDate"
            seconds_to_mds_date "$return_date_seconds"
            LRMSStartTime=$return_mds_date
            echo "$progname: LRMSStartTime=$LRMSStartTime"
        fi
        if [ -n "$EnteredCurrentStatus" ]; then
            date_seconds_to_utc "$EnteredCurrentStatus"
            seconds_to_mds_date "$return_date_seconds"
            LRMSEndTime=$return_mds_date
            echo "$progname: LRMSEndTime=$LRMSEndTime"
        fi
    fi

    LRMSExitcode=${__ExitCode:-$_ExitCode}

    # set LRMSExitcode to signal that no more tries are necessary
    [ -n "$LRMSExitcode" ] || log "ExitCode not found in condor log and condor_history"

    # set message in case condor killed the job. LRMSExitcode should not be 0.
    if [ -n "$PeriodicRemove" ]; then
        [ "$LRMSExitcode" = 0 ] && LRMSExitcode=
        LRMSMessage="PeriodicRemove evaluated to TRUE"
    elif [ -n "$RemoveReason" ] && [ "$RemoveReason" != "None" ]; then
        [ "$LRMSExitcode" = 0 ] && LRMSExitcode=
        LRMSMessage="RemoveReason: $RemoveReason"
    elif [ -n "$ExitReason" ] && [ "$ExitReason" != "None" ]; then
        [ "$LRMSExitcode" = 0 ] && LRMSExitcode=
        LRMSMessage="ExitReason: $ExitReason"
    fi

    # Check whether the job was killed by Condor. If yes, check for exceeded resources limits
    if ( [ -n "$RemoveReason" ] && [ "$RemoveReason" != "None" ] ) || [ -n "$PeriodicRemove" ]; then
        read_grami; autodetect_overlimit
    fi

    # Condor does not write a .diag file.
    exitcode=$LRMSExitcode
}

scan_init
scan_main "$@"

exit 0