#! /bin/sh
# This is the LHEA perl script: /cvmfs/extras-fp7.egi.eu/extras/heasoft/heatools/x86_64-unknown-linux-gnu-libc2.19-0/bin/ftjoin
# The purpose of this special block is to make this script work with
# the user's local perl, regardless of where that perl is installed.
# The variable LHEAPERL is set by the initialization script to
# point to the local perl installation.
#-------------------------------------------------------------------------------
eval '
if [ "x$LHEAPERL" = x ]; then
  echo "Please run standard LHEA initialization before attempting to run /cvmfs/extras-fp7.egi.eu/extras/heasoft/heatools/x86_64-unknown-linux-gnu-libc2.19-0/bin/ftjoin."
  exit 3
elif [ "$LHEAPERL" = noperl ]; then
  echo "During LHEA initialization, no acceptable version of Perl was found."
  echo "Cannot execute script /cvmfs/extras-fp7.egi.eu/extras/heasoft/heatools/x86_64-unknown-linux-gnu-libc2.19-0/bin/ftjoin."
  exit 3
elif [ `$LHEAPERL -v < /dev/null 2> /dev/null | grep -ic "perl"` -eq 0 ]; then
  echo "LHEAPERL variable does not point to a usable perl."
  exit 3
else
  # Force Perl into 32-bit mode (to match the binaries) if necessary:
  if [ "x$HD_BUILD_ARCH_32_BIT" = xyes ]; then
    if [ `$LHEAPERL -V 2> /dev/null | grep -ic "USE_64_BIT"` -ne 0 ]; then
      VERSIONER_PERL_PREFER_32_BIT=yes
      export VERSIONER_PERL_PREFER_32_BIT
    fi
  fi
  exec $LHEAPERL -x $0 ${1+"$@"}
fi
'
if(0);
# Do not delete anything above this comment from an installed LHEA script!
#-------------------------------------------------------------------------------
#!/usr/bin/perl
#
# ftjoin - Join the columns of two tables according to a matching criterium
#
# The ftjoin task joins the columns of two tables into one table, using
# an arbitrary matching expression.  In the languages of databases, this
# is known as a "database join."  The two tables to be joined are
# designated the "left" and "right" tables.
#
# See the help file for more user documentation ...
#
# METHOD OF OPERATION
# 
# Consider two tables, L and R, with N and M rows respectively:
#
#   {     L1    }      {     R1    }   where Li is the ith row of L
#   {     .     }      {     .     }   and   Rj is the jth row of R   
#   {     .     }      {     .     }
#   {     LN    }      {     .     }
#                      {     RM    }
#
# A combined table is made with the columns of both tables, and M
# rows, and the column names are renamed by the renaming algorithm.
# The R table is filled in, but not the L table.  Also, a column is
# made to hold the row number of the right hand table,
#
#   {   blank    |      R1     | 1 }
#   {   blank    |      .      | 2 }
#   {   blank    |      .      | 3 }
#   {   blank    |      .      | . }
#   {   blank    |      RM     | M }
#
# Then, one at a time, each row of the table L is filled into the
# combined table.  First,
#
#   {    L1      |      R1     | 1 }
#   {    L1      |      .      | . }
#   {    .       |      .      | . }
#   {    L1      |      RM     | M }
#
# and then the matching function is performed.  Any matches are sent
# to the output table.  There is a special case for the LEFT OUTER or
# FULL OUTER join, which if there is no match, sends a single row to
# the output with null values for the right hand side.
#
# Next, the 2nd row is filled in,
#
#   {    L2      |      R1     | 1 }
#   {    L2      |      .      | . }
#   {    .       |      .      | . }
#   {    L2      |      RM     | M }
#
# and the matching is repeated, etc. until all the rows of the left
# hand table have been compared.  
#
# If performing a RIGHTOUTER or FULLOUTER join, a check is done to
# find out which rows of the right table haven't been saved, and those
# are saved, with null values filling in the left hand side of the
# table.
#
# For efficiency, the combined table is maintained in memory.
#
# $Id: ftjoin,v 1.10 2011/08/30 21:01:18 craigm Exp $
#
# $Log: ftjoin,v $
# Revision 1.10  2011/08/30 21:01:18  craigm
# Now add warnings if the user requests an 'OUTER' join (i.e. union) when column-null values are not defined; Add history keywords if requested by user; unit tests still pass --CM
#
# Revision 1.9  2009/11/16 09:38:53  craigm
# Reinitialize variable to empty; fixes a bug with FULLOUTER and RIGHTOUTER modes, where too many rows were written to the output table (with duplicates); unit tests now pass --CM
#
# Revision 1.8  2009/08/26 02:01:37  craigm
# Allow users to use the expressions #L_ROW and #R_ROW to easily work with row numbers of left and right table --CM
#
# Revision 1.7  2006/05/05 18:52:06  craigm
# Bump to version 1.1; allow jointype=INTERSECTION and jointype=UNION; additional documentation; unit tests still pass --CM
#
# Revision 1.6  2006/04/07 20:52:48  craigm
# Add a CVS id and log variable --CM
#
#

use HEACORE::HEAINIT;

# ===================================
# Execute main subroutine, with error trapping
$status = 0;
undef $memfile;
eval {
    $status = headas_main(\&ftjoin);
};

# ===================================
# Check for errors and report them to the console
if ($@) {
    if ($status == 0) { $status = -1; }
    warn $@;
    exit $status;
}
exit 0;

# ===================================
# Utility routine to return the number of rows in a FITS table
sub nrows {
    my ($fits,$status) = (@_);
    my ($nrows);

    $fits->get_num_rows($nrows, $status);
    $_[1] = $status;
    return $nrows;
}

# ===================================
# Utility routine to return the number of columns in a FITS table
sub ncols {
    my ($fits,$status) = (@_);
    my ($ncols);

    $fits->get_num_cols($ncols, $status);
    $_[1] = $status;
    return $ncols;
}


# ===================================
# Main subroutine
sub ftjoin {
    # Makes all environment variables available
    use Env;
    use Cwd;

    # The HEAUTILS module provides access to HDpar_stamp()
    # set_toolname(), set_toolversion(), and headas_clobberfile()
    use HEACORE::HEAUTILS;
    use HEACORE::PIL;
    # include the file specification functions
    use Astro::FITS::CFITSIO qw( :shortnames :constants );

    my $taskname = "ftjoin";
    my $version = "1.2";

    # Use the standard HEAdas methods for registering the toolname and version number to be
    # used in error reporting and in the record of parameter values written by HDpar_stamp
    set_toolname($taskname);
    set_toolversion($version);

    eval {
	$status = &ftjoin_work();
    };

    # Make a special effort to close the temporary file, so that it
    # can be inspected for debugging purposes.
    if (defined($memfile)) {
	$mystatus = 0;
	$memfile->close_file($mystatus);
	undef $memfile;
    }

    if ($@) {
	if ($status == 0) { $status = -1; }
	warn $@;
	return $status;
    }

    return $status;
}

sub ftjoin_work {

    # --------------- parameters
    ($status = PILGetFname('leftfile', $leftfile)) == 0 || die "error getting leftfile parameter";
    ($status = PILGetFname('rightfile', $rightfile)) == 0 || die "error getting rightfile parameter";
    ($status = PILGetFname('outfile', $outfile)) == 0 || die "error getting outfile parameter";


    ($status = PILGetString('expr', $selexpr)) == 0 || die "error getting 'expr' parameter";

    ($status = PILGetString('leftnameprefix', $leftnameprefix)) == 0 || die "error getting 'leftnameprefix' parameter";
    ($status = PILGetString('leftnamesuffix', $leftnamesuffix)) == 0 || die "error getting 'leftnamesuffix' parameter";
    ($status = PILGetString('rightnameprefix', $rightnameprefix)) == 0 || die "error getting 'rightnameprefix' parameter";
    ($status = PILGetString('rightnamesuffix', $rightnamesuffix)) == 0 || die "error getting 'rightnamesuffix' parameter";

    ($status = PILGetString('outcolumns', $outcolumns)) == 0 || die "error getting 'outcolumns' parameter";

    ($status = PILGetBool('cleanup', $cleanup)) == 0 || die "error getting 'cleanup' parameter";
    ($status = PILGetString('debugfile', $debugfile)) == 0 || die "error getting 'debugfile' parameter";
    ($status = PILGetBool('dupcolnames', $dupcolnames)) == 0 || die "error getting 'dupcolnames' parameter";

    ($status = PILGetString('jointype', $jointype)) == 0 || die "error getting 'jointype' parameter";
    ($status = PILGetInt('chatter', $chatter)) == 0 || die "error getting chatter parameter";
    if ($chatter >= 5) { $verbose = 1; }

    # Reset the match values in the parameter file
    PILPutInt('nbothmatch',0);
    PILPutInt('nleftmatch',0);
    PILPutInt('nrightmatch',0);

    # ---------------- error checking of parameters

    # Check if output is a pipe; we have to do something different
    # about the scratch files then.
    $outfile1 = $outfile;
    if ($debugfile =~ m/none/i) { undef $debugfile; }

    $jointype =~ s/ //g;
    $jointype =~ tr/a-z/A-Z/;
    print "jointype=$jointype\n" if ($verbose);

    # Mix in standard set-wise aliases
    if ($jointype =~ m/^INTERSECT/) { $jointype = "INNER"; }
    if ($jointype eq "UNION")       { $jointype = "FULLOUTER"; }

    if ($jointype ne "INNER" && $jointype ne "LEFTOUTER" &&
	$jointype ne "RIGHTOUTER" && $jointype ne "FULLOUTER") {
	die "ERROR: jointype must be one of INNER, \"LEFT OUTER\", \"RIGHT OUTER\", or \"FULL OUTER\"";
    }

    if ($leftnameprefix  =~ m/none/i) { $leftnameprefix = ""; }
    if ($leftnamesuffix  =~ m/none/i) { $leftnamesuffix = ""; }
    if ($rightnameprefix =~ m/none/i) { $rightnameprefix = ""; }
    if ($rightnamesuffix =~ m/none/i) { $rightnamesuffix = ""; }

    $status = 0;

    $leftrownum  = "LROW_$$";
    $rightrownum = "RROW_$$";

    $leftfile1  = "$leftfile";
    $rightfile1 = "$rightfile";

    # ------------- Open the input files
    Astro::FITS::CFITSIO::fits_open_table($left,"$leftfile1",READONLY,$status);
    $left->get_hdu_type($lhdutype,$status);
    die "ERROR: could not open $leftfile1" if ($status);
    Astro::FITS::CFITSIO::fits_open_table($right,"$rightfile1",READONLY,$status);
    $right->get_hdu_type($rhdutype,$status);
    die "ERROR: could not open $rightfile1" if ($status);

    # Check the table type
    if ($lhdutype != $rhdutype) {
	$status = NOT_BTABLE;
	die "ERROR: input tables must be of the same type (binary or ASCII)";
    }

    # ------------- 
    # Temporary table for doing the selections
    if ($debugfile) {
	headas_clobberfile("$debugfile");
	$memfile = Astro::FITS::CFITSIO::create_file("$debugfile",$status);
    } else {
	$memfile = Astro::FITS::CFITSIO::create_file("mem://tmp",$status);
    }
    die "ERROR: could not create temporary file" if ($status);
    $left->copy_hdu($memfile,0,$status);
    $nrows = &nrows($memfile,$status);
    $ncols = &ncols($memfile,$status);
    $left ->read_key(TLONG,"NAXIS1",$naxis_left, $comment,$status);
    $right->read_key(TLONG,"NAXIS1",$naxis_right,$comment,$status);

    # ------------- Remove all the rows
    $memfile->delete_rows(1,$nrows,$status);
    # Create a column to hold the left table row number
    die "ERROR: could not create internal table" if ($status);

    # ------------- 
    # Copy all the column related keywords to the temporary file,
    # but be sure to re-write the column numbers
    $nkeys = 0;
    $right->get_hdrspace($nkeys,$morekeys,$status);
    foreach $keynum (1 .. $nkeys) {
	$right->read_record($keynum,$card,$status);
	$keyname = substr($card,0,8);
	print " keyname = $keyname  " if ($verbose);
	next if ($keyname eq "        ");
	# Find column-related keywords:
	#                   TAAAAnA |iCAAAna   |ijPC/CDna   |iV/Sn_ma  |WCSAX/SNna
	if ($keyname =~ m/^(T.*[A-Z]|\dC.*[A-Z]|\d\d[PC][CD]|\d[VS]\d+_|WC[AS][XN]|LONP|LATP|EQUI|MJDOB|MJDA|RADE|DAVG)(\d+)([A-Z])? */) {
	    $keyroot = $1;
	    $colnum  = $2;
	    $wcscoor = $3;
	    $newcolnum = $colnum + $ncols;
	    $newkeyname = $keyroot . $newcolnum . $wcscoor;
	    print "   --> $newkeyname  " if ($verbose);

	    # Special handling for TBCOLn keywords, which need to be updated
	    # for the new character position
	    if ($lhdutype == ASCII_TBL && $keyroot eq "TBCOL") {
		$right->read_keyn($keynum,$keyname,$value,$comment,$status);
		$value += $naxis_left;
		$memfile->update_key(TLONG,$newkeyname,$value,$comment,$status);
	    } else {
		$newcard = sprintf("%-8s%s",$newkeyname, substr($card,8));
		print "\n    :$newcard:" if ($verbose);
#		$memfile->write_record($newcard,$status);
		$memfile->update_card($newkeyname,$newcard,$status);
	    }
	}
	print "\n" if ($verbose);
    }
    die "ERROR: could not transfer RIGHT keywords to internal table" if ($status);

    # ------------- Update the NAXIS1 and TFIELDS values

    $ncols_left  = &ncols($left,$status);
    $ncols_right = &ncols($right,$status);
    $nrows_left  = &nrows($left,$status);
    $nrows_right = &nrows($right,$status);
    $naxis_tot   = $naxis_left+$naxis_right;
    $ncols_tot   = $ncols_left+$ncols_right;
    $memfile->update_key(TLONG,"NAXIS1",$naxis_tot," Number of bytes per row",
		     $status);
    $memfile->update_key(TLONG,"TFIELDS",$ncols_tot,undef,$status);

    if ($chatter >= 2) {
	print "Left:  $ncols_left columns; $nrows_left rows; $naxis_left bytes/row;\n";
	print "Right: $ncols_right columns; $nrows_right rows; $naxis_right bytes/row;\n";
    }

    # ------------- Perform the column renaming
    for $col (1 .. $ncols_tot) {
	# Read original column name
	$memfile->read_key(TSTRING,"TTYPE$col",$colname,$comment,$status);
	print "  colname $col = $colname  " if ($verbose);
	last if ($status);

	# Re-name column using template rules
	if ($col <= $ncols_left) {
	    $newcolname = $leftnameprefix . $colname . $leftnamesuffix;
	} else {
	    $newcolname = $rightnameprefix . $colname . $rightnamesuffix;
	}
	$colnames[$col] = "$newcolname";

	# Update column name
	if ($colname ne $newcolname) {
	    $memfile->update_key(TSTRING,"TTYPE$col",$newcolname,undef,$status);
	    print "     --> $newcolname  " if ($verbose);
	}
	print "\n" if ($verbose);

	# Check for duplicates
	if ($collist{$newcolname}) {
	    warn "Duplicate columns named $newcolname are present" if ($dupcolnames);
	    die  "Duplicate columns named $newcolname are present" if (!$dupcolnames);
	}

	# Record this name
	$collist{$newcolname} = 1;
    }
    die "ERROR: Could not create internal table 1 (status = $status)" if ($status);

    # Update internal table structure
    $memfile->set_hdustruc($status);

    # Insert two new columns to contain the row numbers
    if ($lhdutype == ASCII_TBL) {
	$tform_long = "I12";
    } else {
	$tform_long = "1J";
    }
    $lrowcol = $ncols_tot + 1; $rrowcol = $ncols_tot + 2;
    $memfile->insert_col($lrowcol,"$leftrownum",$tform_long,$status);
    $memfile->update_key(TLONG,"TNULL$lrowcol",-1," null value",$status);
    $memfile->insert_col($rrowcol,"$rightrownum",$tform_long,$status);
    $memfile->update_key(TLONG,"TNULL$rrowcol",-1," null value",$status);

    die "ERROR: Could not create internal table 2 (status = $status)" if ($status);

    # ------------- 
    # Create the output file and copy the blank temporary file to it
    headas_clobberfile("$outfile1");
    $out = Astro::FITS::CFITSIO::create_file("$outfile1",$status);
    die "ERROR: could not create output file" if ($status);
    $memfile->copy_hdu($out,0,$status);
    die "ERROR: could not copy internal table to output (status=$status)" 
	if ($status);

    # ------------- Copy the right table into the temporary table
    $memfile->insert_rows(0,$nrows_right,$status);
    foreach $row (1 .. $nrows_right) {
	$right->read_tblbytes ($row,1,$naxis_right,$values,$status);
	$memfile  ->write_tblbytes($row,$naxis_left+1,
			       $naxis_right,$values,$status);
    }
    $right->close_file($status);
    die "ERROR: Could not copy RIGHT table to internal buffer (status=$status)"
	if ($status);

    # Add row numbers
    @rrow = (1 .. $nrows_right);
    $memfile->write_col(TLONG, $rrowcol, 1, 1, $#rrow+1, \@rrow, $status);
    die "ERROR: Could not add row numbers to internal buffer (status=$status)"
	if ($status);

    # Check if selection expression refers to row number
    if ($selexpr =~ m/#L_ROW\b/i) {
      $has_lrowname = 1;
      $selexpr =~ s/#L_ROW\b/$leftrownum/ig;
    }
    if ($selexpr =~ m/#R_ROW\b/i) {
      $has_rrowname = 1;
      $selexpr =~ s/#R_ROW\b/$rightrownum/ig;
    }

    # Initialize statistics
    $nbothmatch = 0;
    $nleftmatch = 0;
    $nrightmatch = 0;
    
    # ================================================================
    # ================================================================
    # ------------- Main loop
    $nrows_tmp = 0;
    foreach $row (1 .. $nrows_left) {

	if ($row % 100 == 1 && $verbose) {
	    print "\nRow $row: ";
	}
	print "." if ($verbose);

	# ------------ Read one row from left and write it to all rows of tmp
	$left->read_tblbytes ($row,1,$naxis_left,$values,$status);
	# Pack them once so that we avoid the overhead of repeated
	# re-packings.
	$values = pack("C*",@$values);
	for $trow (1 .. $nrows_right) {
	    $memfile  ->write_tblbytes($trow,1,$naxis_left,\$values,$status);
	}
	# Write left column number if we are selecting on it.
	if ($has_lrowname) {
	  @rrow = ($row) x $nrows_right;
	  $memfile->write_col(TLONG, $lrowcol, 1, 1, $#rrow+1, \@rrow, $status);
	}
	
	# ------------ Perform selection operation
	$memfile->select_rows($out,"$selexpr",$status);
	if ($status) {
	    die "ERROR: selection expression '$selexpr' failed with error code $status.  Did you remember to use the 'renamed' column names in the filter expression?";
	}

	# ------------ Special process for outer joins
	$nrows = &nrows($out,$status);
	$nbothmatch += ($nrows-$nrows_tmp);
	if ($nrows == $nrows_tmp && 
	    ($jointype eq "LEFTOUTER" || $jointype eq "FULLOUTER")) {
	    $nleftmatch ++;

	    # Mis-match on the left table: write the left table
	    print "n" if ($verbose);
	    $out->insert_rows($nrows,1,$status);
	    # Write the left table data
	    $out->write_tblbytes($nrows+1,1,$naxis_left,\$values,$status);

	    # Write nulls for the right columns; precompute them once
	    # so we don't have to keep re-writing null values in a 
	    # slow loop.
	    if ($nullbytes) {
		# Pre-computed null row
		$out->write_tblbytes($nrows+1,$naxis_left+1,
				     $naxis_right,\$nullbytes,$status);
	    } else {
		foreach $tcol ($ncols_left+1 .. $ncols_left+$ncols_right) {
		    $out->get_coltype($tcol,$typecode,$repeat,$width,$status);
		    if ($typecode == TSTRING) {
			#  Note that strings are treated differently 
			$repeat /= $width;
		    }
		    $out->write_col_null($tcol,$nrows+1,1,$repeat,$status);
		    if ($status == NO_NULL) {
		      if (! defined($nullwarn{$tcol}) ) {
			warn "WARNING: no null value is defined for output column $tcol ($colnames[$tcol]); there may be garbage values for that output column.";
			$nullwarn{$tcol} = 1;
		      }
		      $status = 0;
		    }
		}
		# Save this set of nulls for the next time
		$out->read_tblbytes($nrows+1,$naxis_left+1,
				    $naxis_right,$nullbytes,$status);
		$nullbytes = pack("C*",@$nullbytes);
	    }
	}

	if ($nrows > $nrows_tmp && $verbose) { print "+"; }
	$nrows_tmp = &nrows($out,$status);
    }
    print "\n" if ($verbose);

    # End of main loop
    # ==================================================================
    # ==================================================================

    $left->close_file($status);
    $nrows = &nrows($out,$status);

    # Follow-up work if the join is FULL or RIGHT outer.  In that
    # case, we have to make a sweep to be sure that we haven't missed
    # any right-side rows.

{
    if (($jointype eq "RIGHTOUTER" || $jointype eq "FULLOUTER") &&
	($status == 0)) {
	$nrows_out = &nrows($out,$status);

	# Read the right equivalent of #ROW
	undef @rrow;
	$out->get_colnum(CASEINSEN,"$rightrownum",$colnum,$status);
	$out->get_coltype($colnum,$typecode,$repeat,$width,$status);
	$out->read_col($typecode,$colnum,1,1,$nrows_out,undef,
		       \@rrow,undef,$status);

	# ----------- Check whether each right row was output or not
	if ($status == 0) {
	    # Find the selected right-hand rows
	    foreach $value (@rrow) {
		$xrows{$value} = 1 if ($value >= 1 && $value <= $nrows_right);
	    }
	    @rrow = sort {$a <=> $b} keys(%xrows);
	    $status = 0;
	    print "number of rows already written = $#rrow + 1\n" if ($verbose);

	    # Delete the rows in the temporary table which have already
	    # been written;
	    print "deleting rows...\n" if ($verbose);
	    $memfile->delete_rowlist(\@rrow, $#rrow+1, $status);

	    # Write the first row of nulls into the left hand side
	    # of the table
	    $nrows_tmp = &nrows($memfile,$status);
	    print "remaining rows in memory file = $nrows_tmp + 1\n" if ($verbose);
	    $nrightmatch += $nrows_tmp;
	    if ($status == 0 && $nrows_tmp > 0) {
		foreach $tcol (1 .. $ncols_left) {
		    $memfile->get_coltype($tcol,$typecode,$repeat,$width,$status);
		    if ($typecode == TSTRING) {
			#  Note that strings are treated differently 
			$repeat /= $width;
		    }
		    $memfile->write_col_null($tcol,1,1,$repeat,$status);
		    if ($status == NO_NULL) {
		      if (! defined($nullwarn{$tcol}) ) {
		        warn "WARNING: no null value is defined for output column $tcol ($colnames[$tcol]); there may be garbage values for that output column.";
			$nullwarn{$tcol} = 1;
		      }
		      $status = 0;
		    }
		}
		# Save this set of nulls for the following rows
		$memfile->read_tblbytes(1,1,$naxis_left,$nullbytes,$status);
		$nullbytes = pack("C*",@$nullbytes);
		# Write the remaining null bytes wholesale
		foreach $row (2 .. $nrows_tmp) {
		    $memfile->write_tblbytes($row,1,$naxis_left,
					  \$nullbytes,$status);
		}
		# Transfer this data to the output
		$memfile->select_rows($out,"1 == 1",$status);
	    }
	}
    }
}

# {
#     if ("$outcolumns" ne "*") {
# 	
#     }
# }

    # Update the statistics in the parameter file
    PILPutInt('nbothmatch',$nbothmatch);
    PILPutInt('nleftmatch',$nleftmatch);
    PILPutInt('nrightmatch',$nrightmatch);
    if ($chatter >= 2) {
	print "Matches: $nbothmatch both; $nleftmatch left; $nrightmatch right;\n";
    }

    if ($cleanup) {
	print "Cleaning up output table (columns $lrowcol & $rrowcol)...\n" if ($verbose);
	# Order is important, because we are deleting them in place
	$out->delete_col($rrowcol,$status);
	$out->delete_col($lrowcol,$status);
    }
    $memfile->close_file($status);
    undef $memfile;

    # Write history keywords
    HDpar_stamp($out, 0, $status);
    $out->close_file($status);

    return $status;
}