#!/usr/bin/env python

#import built-in modules
import os,sys
import re
import string
from optparse import OptionParser
import warnings
import string
import collections
import math

#import third-party modules

#changes to the paths

#changing history to this module
#05/26/2011: suppport multiple spliced mapped reads

__author__ = "Liguo Wang"
__copyright__ = "Copyright 2010, Wei Li's Lab"
__credits__ = []
__license__ = "GPL"
__version__ = "1.1.0"
__maintainer__ = "Liguo Wang"
__email__ = "liguow@bcm.edu"
__status__ = "Development" #Prototype or Production


def RSS(arg):
	'''calculate Square root of sum of square. Input is ',' separated numbers'''
	lst=arg.split(',')
	lst_sum=0
	for i in [ int(i)**2 for i in lst]:
		lst_sum += i 
	#nsr=10*math.log10((1+noi_sum**0.5)/(1+sig_sum**0.5))
	return lst_sum**0.5
	
def H_mean(arg):
	'''calculate harmornic mean. Input is ',' separated numbers'''
	lst=[1/float(i) for i in arg.split(',') if float(i) !=0]
	if len(lst) == 0:
		return "NA"
	else:
		return len(lst)/(sum(lst))

def shannon_entropy(arg):
	'''calculate shannon's entropy (or Shannon-Wiener index).'''
	lst=arg
	lst=[float(i) for i in lst if float(i)>0]
	entropy=0.0
	for i in lst:
		entropy += (i/sum(lst)) * math.log((i/sum(lst)))
	if entropy == 0:
		return 0
	else:
		return -entropy

	
def shannon_entropy_es(arg):
	'''calculate estimator of shannon's entropy (Chao & Shen, 2003)'''
	lst=arg
	lst=[float(i) for i in lst if float(i)>0]
	if sum(lst)<=0 or min(lst)<0:return "NA"	#if there is no fragmental splicing
	if (len(lst)==1): return 0					#if there is only 1 fragmental splicing
	lst.append(2)
	
	#estimate C_bar
	singleton=0
	entropy=0.0
	for i in lst:
		if i ==1:singleton +=1
	
	C_bar = 1- (singleton/sum(lst))
	for i in lst:entropy += ( (C_bar*i/sum(lst)) * math.log((C_bar*i/sum(lst))) )/(1-(1-C_bar*i/sum(lst))**sum(lst))
	if entropy == 0:
		return 0
	else:
		return -entropy

def shannon_entropy_ht(arg):
	'''calculate estimator of shannon's entropy based on Horzitz-Thompson'''
	lst=arg.split(',')
	lst=[float(i) for i in lst if float(i)>0]
	if sum(lst)<=0 or min(lst)<0:return "NA"	#if there is no fragmental splicing
	if (len(lst)==1): return 0					#if there is only 1 fragmental splicing
	
	#estimate C_bar
	entropy=0.0
	for i in lst:
		entropy += ( (i/sum(lst)) * math.log((i/sum(lst))) )/(1-(1-i/sum(lst))**sum(lst))
	return -entropy
	
def simpson_index(arg):
	'''calculate Gini-Simpson's index. Input is ',' separated numbers'''
	lst=arg.split(',')
	lst=[float(i) for i in lst if float(i)>0]
	simpson=0.0
	
	try:
		for i in lst:
			simpson = simpson + (i/sum(lst))**2
		return 1-simpson
	except: return 0
	
def simpson_index_es(arg):
	'''calculate estimator Gini-Simpson's index. Input is ',' separated numbers'''
	lst=arg.split(',')
	lst=[float(i) for i in lst if float(i)>0]
	simpson=0.0
	
	try:
		for i in lst:
			simpson = simpson + i*(i-1)
		return 1- (simpson/(sum(lst)*(sum(lst)-1)))
	except: return 0
	
def Hill_number(arg,qvalue=1):
	'''Calculate real diversity (Hill's number). Input is ',' separated numbers. qvalue is the only
	parameter for Hill's function. When q=1, it return exp(H) which is the effective number of junctions
	calculated by Shannon's entropy. When q<1, Hill's function was favors low frequency junctions. 
	When q>1, Hill's function was favors high frequency junctions (common junctions). Simpon's Index
	is particular case of Hill's function as q=2'''
	
	lst=arg.split(',')
	lst=[float(i) for i in lst if float(i)>0]
	freq=[(i/sum(lst))**qvalue for i in lst]
	try:
		return (sum(freq))**(1/(1-qvalue))
	except:
		return math.exp(shannon_entropy(arg))
import math
import functools

def percentile(N, percent, key=lambda x:x):
    """
    Find the percentile of a list of values.

    @parameter N - is a list of values. Note N MUST BE already sorted.
    @parameter percent - a float value from 0 to 100.
    @parameter key - optional key function to compute value from each element of N.

    @return - the percentile of the values
    """
    if not N:
        return None
    k = (len(N)-1) * percent/100.0
    f = math.floor(k)
    c = math.ceil(k)
    if f == c:
        return key(N[int(k)])
    d0 = key(N[int(f)]) * (c-k)
    d1 = key(N[int(c)]) * (k-f)
    return d0+d1
	
def percentile_list(N):
	"""
	Find the percentile of a list of values.
	@parameter N - is a list of values. Note N MUST BE already sorted.
    @return - the list of percentile of the values
	"""
	if not N:return None
	if len(N) <100: return N
	per_list=[]
	for i in range(1,101):
		k = (len(N)-1) * i/100.0
		f = math.floor(k)
		c = math.ceil(k)
		if f == c:
			per_list.append( int(N[int(k)])  )
		else:
			d0 = N[int(f)] * (c-k)
			d1 = N[int(c)] * (k-f)
			per_list.append(int(round(d0+d1)))	
	return per_list