#! /usr/bin/env perl
##**************************************************************
##
## Copyright (C) 1990-2007, Condor Team, Computer Sciences Department,
## University of Wisconsin-Madison, WI.
## 
## Licensed under the Apache License, Version 2.0 (the "License"); you
## may not use this file except in compliance with the License.  You may
## obtain a copy of the License at
## 
##    http://www.apache.org/licenses/LICENSE-2.0
## 
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
##**************************************************************

use warnings;
use strict;
use Getopt::Long;
use Date::Manip;
use File::Path;
use FindBin qw($RealBin $RealScript);

# contains parsed arguments from ARGV
my %args;
my @original_argv;

sub usage
{
	print<<EOF;
Usage: $0 --jobid <CLUSTER.PROC> [ OPTIONS ]
--jobid      The jobid
--scratch    Scratch directory used by the program, defaults to /tmp/cbd-<PID>

*** The options below are not fully implemented. ***
--sdate      A Unix date corresponding to the start of the search time
--edate      A Unix date corresponding to the end of the search time
--pool       The pool to query (local pool if not defined)
--schedd     The schedd to query (local schedd if not defined)
EOF

	exit(1);
}

sub parse_command_line
{
	my $rc;
	my $tmp;

	@original_argv = @ARGV;

	$rc = GetOptions( \%args,
			'help',
			'jobid=s',
			'sdate=s',
			'edate=s',
			'pool=s',
			'schedd=s'
			);
	
	if (!$rc) {
		usage();
	}

	if (defined($args{'help'})) {
		usage();
	}


	if (!defined($args{'jobid'})) {
            my $file = "condor-profile.txt";
            acquire_identity($file);
            print "Information is in $file\n";
            exit 0;
	}

	# ######
	# Reparse the start time into a Date::Manip form.
	# ######
	if (exists($args{'sdate'})) {
		if ($args{'sdate'} =~ m/^\d+$/) {
			$args{'sdate'} = ParseDateString("epoch $args{'sdate'}");
		} else {
			$args{'sdate'} = ParseDate($args{'sdate'});
		}
	} else {
		# start of epoch seconds
		$args{'sdate'} = ParseDate("Jan 1, 1970  00:00:00 GMT");
	}

	# ######
	# Reparse the end time into a Date::Manip form.
	# ######
	if (exists($args{'edate'})) {
		if ($args{'edate'} =~ m/^\d+$/) {
			$args{'edate'} = ParseDateString("epoch $args{'edate'}");
		} else {
			$args{'edate'} = ParseDate($args{'edate'});
		}
	} else {
		# end of epoch seconds-ish
		$args{'edate'} = ParseDate("Jan 19, 2038  00:00:00 GMT");
	}

	# ######
	# set up the scratch space
	# ######

	# This stupid date format is because tar does insanely stupid things when
	# there is a colon in the archive name.
	$tmp = `date +%F-%I_%M_%S%p-%Z`;
	chomp $tmp;
	$tmp = getlogin() . "-jid$args{'jobid'}-$tmp";

	# The _scratch for is the parent of the inner scratch directory.
	if (!exists($args{'scratch'})) {
		$args{'scratch'} = "/tmp/cgi-$$/cgi-$tmp";
		$args{'_scratch'} = "/tmp/cgi-$$";
	} else {
		$args{'scratch'} .= "/cgi-$$/cgi-$tmp";
		$args{'_scratch'} = "$args{'scratch'}/cgi-$$";
	}

	if (-d $args{'_scratch'}) {
		die "I'm not going to remove /. Sorry." 
			if ($args{'_scratch'} =~ /^\d*\/\d*$/g);
		rmtree($args{'_scratch'}, {'keep_root' => 1});
	}

	# make the scratch space.
	mkpath($args{'scratch'});

	# fix my self up to clean it up on INT
	$SIG{'INT'} = 'sigint';
}

# XXX fixme.
sub cleanup
{
	# remove the scratch dir, sanity check too
	if (-d $args{'_scratch'}) {
		die "I'm not going to remove /. Sorry." 
			if ($args{'_scratch'} =~ /^\d*\/\d*$/g);
		# also remove the scratch dir itself...
		rmtree($args{'_scratch'});
	}
}

sub sigint
{
	print "Got INT. Quitting.\n";
	cleanup();
	exit(1);
}

# Return the start date in epoch seconds from the arguments. If you pass
# in a single argument of a format string, the date will be returned as that
# instead.
sub sdate
{
	my $cmd = shift;
	if (!defined($cmd)) {
		$cmd = "%s";
	}
	return UnixDate($args{'sdate'}, $cmd);
}

# Return the end date in epoch seconds from the arguments. If you pass
# in a single argument of a format string, the date will be returned as that
# instead.
sub edate
{
	my $cmd = shift;
	if (!defined($cmd)) {
		$cmd = "%s";
	}
	return UnixDate($args{'edate'}, $cmd);
}

sub condor_history
{
	my $jobid = shift; # Must exist
	my $output_file = shift; # Fully qualified when passed in...
	my @cqargs = @_;
	my $cmd;
	die "No jobid for which to condor_history!" if !defined($jobid);

	$cmd = "condor_history " . join(" ", @cqargs) . " $jobid " . 
			"> $output_file 2>&1";

	return system("$cmd");
}

sub condor_q
{
	my $jobid = shift; # Must exist
	my $output_file = shift; # Fully qualified when passed in...
	my @cqargs = @_;
	my $cmd;
	die "No jobid for which to condor_q!" if !defined($jobid);

	# XXX deal with --pool and --schedd
	$cmd = "condor_q " . join(" ", @cqargs) . " $jobid " . 
			"> $output_file 2>&1";

	return system("$cmd");
}

sub acquire_identity
{
	my $output_file = shift; # Fully qualified when passed in...

	open(FOUT, '>', $output_file) || 
		die "acquire_identity: Can't open identity file '$output_file': $!";
	
	print "Gathering Condor and machine information...\n";

	print FOUT<<EOF;
################################
# User who created this report #
################################
 ${\(getlogin() . "\n")}
####################################################
# Script invocation and on what machine was it run #
####################################################
 Script: $RealBin/$RealScript @original_argv
 Machine: @{[`/bin/hostname 2>&1`]}
###########################
# Date of Report Creation #
###########################
 @{[`/bin/date 2>&1`]}
############
# uname -a #
############
 @{[`/bin/uname -a 2>&1`]}
##############
# /etc/issue #
##############
 @{[`/bin/cat /etc/issue 2>&1`]}
#######################
# /etc/redhat-release #
#######################
 @{[`/bin/cat /etc/redhat-release 2>&1`]}
#######################
# /etc/debian_version #
#######################
 @{[`/bin/cat /etc/debian_version 2>&1`]}
##############
# /etc/hosts #
##############
 @{[`/bin/cat /etc/hosts 2>&1`]}
######################
# /etc/nsswitch.conf #
######################
 @{[`/bin/cat /etc/nsswitch.conf 2>&1`]}
#############
# ulimit -a #
#############
 @{[`/bin/sh -c "ulimit -a 2>&1"`]}
###########################
# Interface configuration #
###########################
 @{[`/sbin/ifconfig 2>&1`]}
#####################
# ps auxww --forest #
#####################
@{[`ps auxww --forest 2>&1`]}
#########
# df -h #
#########
@{[`df -h 2>&1`]}
############
# iptables #
############
@{[`/sbin/iptables -L 2>&1`]}
###########################
# Condor log dir contents #
###########################
@{[`ls \`condor_config_val LOG\``]}
##################
# Condor Version #
##################
 @{[`condor_version 2>&1`]}
###################################
# Location of Condor Config Files #
###################################
 @{[`condor_config_val -config 2>&1`]}
###########################
# Condor Config Variables #
###########################
 @{[`condor_config_val -v -dump 2>&1`]}
#####################
# ldd condor_schedd #
#####################
 @{[`ldd \`condor_config_val SBIN\`/condor_schedd`]}
#####################
# uptime #
#####################
 @{[`uptime`]}
#####################
# free #
#####################
 @{[`free`]}
EOF

	close(FOUT);
}

sub acquire_job_q
{
	my $jobid = shift; # Must exist
	my $output_file = shift; # Fully qualified when passed in...
	my @cqargs = @_;
	die "acquire_job_q: Error. No jobid!" if !defined($jobid);

	print "acquire_job_q: Getting job q for job $jobid\n";
	return condor_q($jobid, $output_file, @cqargs);
}

sub acquire_job_ad
{
	my $jobid = shift; # Must exist
	my $output_file = shift; # Fully qualified when passed in...
	my @cqargs = @_;
	die "acquire_job_ad: Error. No jobid!" if !defined($jobid);

	print "acquire_job_ad: Getting job ad for job $jobid\n";
	return condor_q($jobid, $output_file, @cqargs);
}

sub acquire_job_analysis
{
	my $jobid = shift; # Must exist
	my $output_file = shift; # Fully qualified when passed in...
	my @cqargs = @_;

	die "acquire_job_analysis: Error. No jobid!" if !defined($jobid);

	print "acquire_job_analysis: Getting job analysis for job $jobid\n";
	return condor_q($jobid, $output_file, @cqargs);
}

sub acquire_job_userlog_lines
{
	my $jobid = shift; # Must exist
	my $output_file = shift; # Fully qualified when passed in...
	my @cqargs = @_;
	my $tmpfile = "$output_file.query";
	my @lines;
	my ($junk, $ulogfile);
	my ($cluster, $proc);
	my $line;

	die "acquire_job_userlog_lines: Error. No jobid!" if !defined($jobid);

	print "acquire_job_userlog_lines: Getting job log entries for job $jobid\n";

	# #######
	# get the path of the user log file
	# #######
	condor_q($jobid, $tmpfile, '-format "logfile: %s\n"', 'UserLog');

	# #######
	# bail if no logfile or otherwise empty
	# #######
	if (-z $tmpfile) {
		system("echo No log file specified in job ad. > $output_file 2>&1");
		print "Trying condor_history instead\n";
		condor_history($jobid, $tmpfile, '-format "logfile: %s\n"', 'UserLog');
		if (-z $tmpfile) {
			print "Not in condor_history either, giving up on looking for logfile in all the wrong places\n";
			rmtree("$tmpfile");
			return;
		}
	}
	
	# #######
	# see if the log file is well formed.
	# #######
	@lines = `grep "logfile:" $tmpfile`;
	if (scalar(@lines) > 1) {
		system("echo Unexpected condor_q problem. See query file. " .
					"> $output_file 2>&1");
		return;
	}

	($junk, $ulogfile) = ($lines[0] =~ m/(^logfile: )(.*)/);

print "ulog file is $ulogfile\n";

	if ($junk !~ /logfile: / || !defined($ulogfile)) {
		system("echo Unexpected userlog path problem. See query file. " .
					"> $output_file 2>&1");
		return;
	}

	if (! -f $ulogfile) {
		system("echo Userlog file does not exist. See query file. " .
					"> $output_file 2>&1");
		return;
	}

	# #######
	# Process the ulog file, saving all lines out of it pertaining to the jobid.
	# You know, this is really a stupid piece of code. condor_userlog should
	# have this functionality built into it.
	# #######
	open(FIN, "$ulogfile") || 
		die "Could not open user log file $ulogfile: $!";
	open(FOUT, ">$output_file") || 
		die "Could not open log trace file $output_file: $!";

	# Just copy the whole job user log even if it has other jobs in it
	while(defined($line = <FIN>)) {
		print FOUT "$line";
	}
	close(FOUT);
	close(FIN);

	# Don't ned this anymore since I did a valid query of the data.
	rmtree("$tmpfile");
}

# This is the main function which gathers all information for a single job
# specified by the passed in jobid.
sub acquire_all_info_for_jobid
{
	my $jobid = shift;
	my $jdir;

	die "No jobid for which to gather jobid info!" if !defined($jobid);

	# The place to store everything for this job id.
	$jdir = "$args{'scratch'}/$jobid";
	mkpath($jdir);

	# ##########
	# Get the job q.
	# ##########
	acquire_job_q($jobid, "$jdir/job_q");

	# ##########
	# Get the job ad.
	# ##########
	acquire_job_ad($jobid, "$jdir/job_ad", '-l');

	# If condor_q -l output does not contain ^ClusterId, our job is missing, check history
	open(CQF, "$jdir/job_ad") || die("Can't open $jdir/job_ad which I just created...\n");
	my @cql = <CQF>;
	close(CQF);

	if ($#cql < 5) {
		print "Job $jobid does not appear in condor_q output, looking in condor_history.\n";
		condor_history($jobid, "$jdir/job_ad", '-l');
		if ((-s "$jdir/job_ad") == 0) {
			print "Job $jobid does not appear in condor_history either.\n";
		} else {
			print "Job $jobid is in condor_history output, using it.\n";
		}
	}

	# ##########
	# Do the analysis for the job
	# ##########
	acquire_job_analysis($jobid, "$jdir/job_ad_analysis", '-better-analyze');

	# ##########
	# Grab all log entries for the jobid from the job log file
	# associated with the job if defined.
	# ##########
	acquire_job_userlog_lines($jobid, "$jdir/job_userlog_lines");

	#######
	# Copy the system log files, assuming we're on the submit side
	######
	system("cp `condor_config_val LOG`/MasterLog $args{'scratch'}/MasterLog");
	system("cp `condor_config_val LOG`/MasterLog.old $args{'scratch'}/MasterLog.old");
	system("cp `condor_config_val LOG`/ShadowLog $args{'scratch'}/ShadowLog");
	system("cp `condor_config_val LOG`/ShadowLog.old $args{'scratch'}/ShadowLog.old");
	system("cp `condor_config_val LOG`/SchedLog.old $args{'scratch'}/SchedLog.old");

	open(ADF, "$jdir/job_ad") || die("Can't open $jdir/job_ad which was just created\n");
	my $RemoteHost;
	while(<ADF>) {
		if (/^RemoteHost /) {
			$RemoteHost = $_;
		}

		if (/^LastRemoteHost /) {
			$RemoteHost = $_;
		}
	}
	chomp($RemoteHost);
	close(ADF);

	if (length($RemoteHost) > 0) {
		$RemoteHost = (split(/ /, $RemoteHost))[2];
		$RemoteHost =~ s/\"//g;
		print "The execute machine for this job is $RemoteHost";
		my $slot = (split(/@/, $RemoteHost))[0];
		if (length($slot) > 2) {
			$RemoteHost = (split(/@/, $RemoteHost))[1];
		}
		system("condor_fetchlog $RemoteHost MASTER > $args{'scratch'}/MasterLog.$RemoteHost");
		system("condor_fetchlog $RemoteHost STARTD > $args{'scratch'}/StartLog");
		system("condor_fetchlog $RemoteHost STARTER.$slot > $args{'scratch'}/StarterLog.$slot");
	} else {
		print "No Remote host found in job ad -- perhaps job this never ran?\n";
	}
}

sub acquire_all_info
{
	my $jobid = shift;
	die "No jobid for which to all info!" if !defined($jobid);

	# ##########
	# Identify myself (condor version, who ran this script, where, when, etc)
	# ##########
	acquire_identity("$args{'scratch'}/condor-profile.txt");

	# ##########
	# datamine extensive information about the jobid.
	# ##########
	acquire_all_info_for_jobid($jobid);
}

sub package_payload
{
	my $pwd;
	my $dir;

	$pwd = `pwd`;
	chomp $pwd;
	chdir("$args{'scratch'}");
	chdir("..");
	# grab the only directory there.
	$dir = `ls -1`;
	chomp $dir;
	print "\nCreating output file with all results in $dir.tar.gz\n";
	system("tar czf $dir.tar.gz $dir > /dev/null 2>&1");
	chdir($pwd);
	system("mv $args{'scratch'}/../*.tar.gz .");
}

sub main
{
	FindBin::again();

	parse_command_line();

	# get all of the info on the invocation machine, the user, and for the
	# specified jobid.
	acquire_all_info($args{'jobid'});

	# package up the scratch directory appropriately and move it to `pwd`.
	package_payload();

	# get rid of all of my on disk resources I used;
	cleanup();
	return 0;
}

exit main();