#!/opt/chipster/tools/Python-2.7.12/bin/python
'''
Annotate splicing reads against gene model in two levels: reads level and  juncion level.
Note:
1) A read, especially long read, can be spliced 2 or more times
2) Multiple splicing reads spanning the same intron can be consolidated into one splicing junction.
'''

#import built-in modules
import os,sys
if sys.version_info[0] != 2 or sys.version_info[1] != 7:
	print >>sys.stderr, "\nYou are using python" + str(sys.version_info[0]) + '.' + str(sys.version_info[1]) + " RSeQC needs python2.7!\n"
	sys.exit()

import re
import string
from optparse import OptionParser
import warnings
import string
import collections
import math
import sets
from time import strftime
import subprocess

#import third-party modules
from bx.bitset import *
from bx.bitset_builders import *
from bx.intervals import *

#import my own modules
from qcmodule import SAM
#changes to the paths

#changing history to this module


__author__ = "Liguo Wang"
__copyright__ = "Copyleft"
__credits__ = []
__license__ = "GPL"
__version__="2.6.4"
__maintainer__ = "Liguo Wang"
__email__ = "wang.liguo@mayo.edu"
__status__ = "Production"


def printlog (mesg):
        '''print progress into stderr and log file'''
        mesg="@ " + strftime("%Y-%m-%d %H:%M:%S") + ": " + mesg
        LOG=open('class.log','a')
        print >>sys.stderr,mesg
        print >>LOG,mesg

def generate_bed12(infile,size=10):
	''' 
	infile: input file. eg: chrX    66766604        66788677        348     partial_novel
	size: the block size representing exons
	'''
	
	outfile = infile.replace('.xls','.bed')
	OUT = open(outfile,'w')
	for line in open(infile,'r'):
		if line.startswith('chrom'):continue
		line = line.strip()
		f = line.split()
		if len(f) != 5:continue
		chrom = f[0]
		start = int(f[1]) - size
		end = int(f[2]) + size
		score = int(f[3])
		strand = '.'
		name = f[4]
		thick_st = start
		thick_end = end
		if name == 'annotated':
			color = '205,0,0'
		elif name == 'partial_novel':
			color = '0,205,0'
		elif name == 'complete_novel':
			color = '0,0,205'
		else:
			color = '0,0,0'
		blockCount = 2
		blockSizes = ','.join((str(size),str(size)))
		blockStarts = '0,' + str(end-size-start)
		print >>OUT, '\t'.join( [str(i) for i in [chrom, start, end, name, score, strand, thick_st, thick_end,color, blockCount, blockSizes,blockStarts]])
		
	
def main():
	usage="%prog [options]" + '\n' + __doc__ + "\n"
	parser = OptionParser(usage,version="%prog " + __version__)
	parser.add_option("-i","--input-file",action="store",type="string",dest="input_file",help="Alignment file in BAM or SAM format.")
	parser.add_option("-r","--refgene",action="store",type="string",dest="ref_gene_model",help="Reference gene model in bed format. This file is better to be a pooled gene model as it will be used to annotate splicing junctions [required]")
	parser.add_option("-o","--out-prefix",action="store",type="string",dest="output_prefix",help="Prefix of output files(s). [required]")
	parser.add_option("-m","--min-intron",action="store",type="int",dest="min_intron",default=50, help="Minimum intron length (bp). default=%default [optional]")
	parser.add_option("-q","--mapq",action="store",type="int",dest="map_qual",default=30,help="Minimum mapping quality (phred scaled) for an alignment to be considered as \"uniquely mapped\". default=%default")

	(options,args)=parser.parse_args()
		
	if not (options.output_prefix and options.input_file and options.ref_gene_model):
		parser.print_help()
		sys.exit(0)
	if not os.path.exists(options.ref_gene_model):
		print >>sys.stderr, '\n\n' + options.ref_gene_model + " does NOT exists" + '\n'
		sys.exit(0)
	if os.path.exists(options.input_file):
		obj = SAM.ParseBAM(options.input_file)
		obj.annotate_junction(outfile=options.output_prefix,refgene=options.ref_gene_model,min_intron=options.min_intron, q_cut = options.map_qual)
		try:
			subprocess.call("Rscript " + options.output_prefix + '.junction_plot.r', shell=True)
		except:
			print >>sys.stderr, "Cannot generate pdf file from " + '.junction_plot.r'
			pass
	else:
		print >>sys.stderr, '\n\n' + options.input_file + " does NOT exists" + '\n'
		sys.exit(0)
	try:
		generate_bed12(options.output_prefix + '.junction.xls')	
	except:
		pass	


if __name__ == '__main__':
        main()