#!/usr/bin/env python
'''manipulate fasta for fastq format files.'''

#import built-in modules
import re
import sys
from string import maketrans
from random import shuffle
from heapq import nlargest
#import third-party modules

#changes to the paths

#changing history to this module


__author__ = "Liguo Wang"
__copyright__ = "Copyright 2010, Wei Li's Lab"
__credits__ = []
__license__ = "GPL"
__version__ = "1.0.1"
__maintainer__ = "Liguo Wang"
__email__ = "liguow@bcm.edu"
__status__ = "Development" #Prototype or Production


def S_diff(lst):
	'''Given a list of int or float, calculate S_diff and S_point'''
	
	S_avg = sum(lst) / len(lst)
	S_dist = [i-S_avg for i in lst]	#distance to average
	S_cum=[]	#list of cumulative sum
	S_cum.append(0)
	for i in range(0,len(S_dist)):
		S_cum.append(S_cum[i] + S_dist[i])
	return [nlargest(1,range(0,len(S_cum)),key=lambda i: S_cum[i]),(max(S_cum) - min(S_cum))]
	#return the index of maximum_diff index, and maximum_diff

def bootstrap(lst,obs,rep=1000):
	'''Given a list of int or float (lst) and an observation value(obs). calcualte the chance (pvalue) 
	of getting this observation through bootstrapping.'''
	
	shuffled_diff=[]
	count=0
	tmp=lst
	for i in range(0,rep):
		shuffle(tmp)
		shuffled_diff.append(S_diff(tmp))
	
	for i in sorted(shuffled_diff):
		if (i>=obs):
			count += 1 
	if count/rep <0.5:
		return count/rep
	else:
		return 1- count/rep