#!/opt/chipster/tools/Python-2.7.12/bin/python # Time-stamp: <2011-03-15 12:32:34 Tao Liu> """Description: Call peaks from two wiggle file, one for treatment and one for control. Copyright (c) 2010,2011 Tao Liu This code is free software; you can redistribute it and/or modify it under the terms of the BSD License (see the file COPYING included with the distribution). @status: experimental @version: $Revision$ @author: Tao Liu @contact: taoliu@jimmy.harvard.edu """ # ------------------------------------ # python modules # ------------------------------------ import os import sys import re import logging from array import array from math import log as mathlog from math import asinh from optparse import OptionParser from MACS14.IO import WiggleIO from MACS14 import Prob from MACS14.Prob import * from MACS14.IO.FeatIO import * # ------------------------------------ # constants # ------------------------------------ logging.basicConfig(level=20, format='%(levelname)-5s @ %(asctime)s: %(message)s ', datefmt='%a, %d %b %Y %H:%M:%S', stream=sys.stderr, filemode="w" ) # ------------------------------------ # Misc functions # ------------------------------------ error = logging.critical # function alias warn = logging.warning debug = logging.debug info = logging.info # ------------------------------------ # Classes # ------------------------------------ # ------------------------------------ # Main function # ------------------------------------ def main(): usage = "usage: %prog [options]" description = "Call peaks from two wiggle file, one for treatment and one for control." optparser = OptionParser(version="%prog 0.1",description=description,usage=usage,add_help_option=False) optparser.add_option("-h","--help",action="help",help="Show this help message and exit.") optparser.add_option("-t","--tfile",dest="tfile",type="string", help="treatment wiggle file. *REQUIRED") optparser.add_option("-c","--cfile",dest="cfile",type="string", help="control wiggle file. *REQUIRED") optparser.add_option("-n","--name",dest="name",type="string", help="name of output files. *REQUIRED") optparser.add_option("-C","--cutoff",dest="cutoff",type="float", help="Cutoff depends on which method you choose. If method is pvalue, 50 means pvalue 1e-5. The default is 1.2 for foldchange",default=1.2) optparser.add_option("--gsize",dest="gsize",type="int", help="genome size. default: 100000000",default=100000000) optparser.add_option("-w","--window",dest="window",type="int", help="the window centered at each data point to check the local bias. default:1000",default=1000) optparser.add_option("-l","--min-length",dest="minlen",type="int", help="minimum length of peak, default: 300",default=300) optparser.add_option("-g","--maxgap",dest="maxgap",type="int", help="maximum gap between significant points in a peak, default: 50",default=50) optparser.add_option("-m","--method",dest="method",type="string", help="""scoring method can be either 'pvalue','asinhdiff', or 'fc'. After the adjusted average values from control data are calculated for each window surrounding every data point as the measurement of local bias, the following method will be applied to compute a score: 1) pvalue: The score will be the -10*log10(poisson pvalue) for each data point. The poisson pvalue = CDF(treatment_value, lambda=local_bias); 2) diff: The local bias will be deducted from the treatment values as scores after asinh transformation . 3) fc: The score will be the fold change/ratio between treatment value to local bias. The default is 'fc'.""", default="fc") (options,args) = optparser.parse_args() method = options.method.lower() if method == "pvalue": func = poisson_score elif method == "asinhdiff": func = diff_score elif method == "fc": func = fc_score else: error("Unrecognized scoring method: %s" % (method)) sys.exit(1) if not options.tfile or not options.cfile or not options.name: optparser.print_help() sys.exit() tf = options.tfile cf = options.cfile if not os.path.isfile(tf) or not os.path.isfile(cf): error("wiggle files are not valid!") sys.exit(1) try: tfhd = open(tf) cfhd = open(cf) except: error("Can't read wiggle files") sys.exit(1) try: bfhd = open(options.name+"_peaks.bed","w") except: error("Can't open %s to write" % options.name+"_peaks.bed") sys.exit(1) try: wfhd = open(options.name+"_scores.wig","w") except: error("Can't open %s to write" % options.name+"_scores.wig") sys.exit(1) info("open treatment wiggle file...") tio = WiggleIO.WiggleIO(tfhd) info("construct treatment wiggle track object...") ttrack = tio.build_wigtrack() tspan = ttrack.span tsum = ttrack.summary()[0] lambda_bg = tsum/options.gsize*tspan info("background average value: %.2f" % lambda_bg) info("open control wiggle file...") cio = WiggleIO.WiggleIO(cfhd) info("construct control wiggle track object...") ctrack = cio.build_wigtrack() csum = ctrack.summary()[0] tc_ratio = tsum/csum info("treatment/control = %.2f, this value will be used to adjust the local bias." % tc_ratio) info("construct control binkeeper object...") ctrack = cio.build_binKeeper() info("build pvalues based on local lambda calculation...") strack = build_scores(ttrack,ctrack,func=func, w=options.window,space=tspan,bg=lambda_bg,tc_ratio=tc_ratio) info("write scores to wiggle file...") strack.write_wig(wfhd,"scores",shift=0) wfhd.close() info("call peaks...") wpeaks = strack.call_peaks(cutoff=options.cutoff,min_length=options.minlen,max_gap=options.maxgap) info("write to bed file...") bfhd.write(wpeaks.tobed()) bfhd.close() info("finished") def build_scores (treat, control, func, w=1000, space=10, bg=0.0001,tc_ratio=1.0): chrs = treat.get_chr_names() scores = WigTrackI() scores.span = treat.span for chrom in chrs: t = treat.get_data_by_chr(chrom) try: c = control[chrom] except: # control doesn't have chrom continue info("Calculate lambdas for chromosome %s" % chrom) c_lambda = __lambda ( t,c, w = w, space=space,tc_ratio=tc_ratio, bg=bg ) info("Calculate scores") for i in xrange(len(t[0])): if t[0][i] == 0: continue s = func(t[1][i],c_lambda[i]) scores.add_loc(chrom,t[0][i],s) return scores def __lambda ( t, c, w = 10000, space=10, tc_ratio = 1.0, bg=0.00001): """Calculate local lambdas ct_ratio: ratio of control and treatment tags w : sliding window length * Hardcoded 100bps as the slocal region. 'w' is used as the llocal region """ l = len(t[0]) ret = array(FBYTE4,[]) reta = ret.append for p in t[0]: s = max(0,p-w/2) e = p+w/2 ss = max(0,p-50) se = p+50 try: values = c.pp2v(s,e) ave = max(bg,sum(values)/w*space*tc_ratio) except: ave = bg try: values = c.pp2v(ss,se) ave = max(ave,sum(values)/100*space*tc_ratio) except: pass reta(ave) return ret def poisson_score ( t, c ): p_tmp = poisson_cdf(t,c,lower=False) if p_tmp <= 0: log_pvalue = 3100 else: log_pvalue = mathlog(p_tmp,10) * -10 return log_pvalue def diff_score ( t, c ): return asinh(t)-asinh(c) def fc_score ( t, c ): if c == 0: return None else: return t/c if __name__ == '__main__': try: main() except KeyboardInterrupt: sys.stderr.write("User interrupt me! ;-) See you!\n") sys.exit(0)