#!/home/jenkins/workspace/ccpem/ccp-em_centos5/devtools/install/devbin/python __author__ = "Felix Simkovic" __date__ = "01 June 2016" __version__ = 0.1 import argparse import conkit import logging import os import sys logging.basicConfig(format='%(message)s', level=logging.INFO) def main(): p = argparse.ArgumentParser() p.add_argument('-prefix', default='conkit', help='Job ID') p.add_argument('-wdir', default=os.getcwd(), help='working directory') p.add_argument('hhblitsdb', help='HHblits database') p.add_argument('fasta', help="fasta file") args = p.parse_args() logging.info('File prefix: {0}'.format(args.prefix)) logging.info('Working dir: {0}'.format(args.wdir)) logging.info('HHblits DB: {0}'.format(args.hhblitsdb)) logging.info('FASTA file: {0}'.format(args.fasta)) # Set the database variable hhblits_db = os.path.abspath(args.hhblitsdb) # Set the sequence file variable name = args.prefix seq_fname = os.path.abspath(args.fasta) a3m_fname = os.path.join(args.wdir, name+'.a3m') hhr_fname = os.path.join(args.wdir, name+'.hhr') jon_fname = os.path.join(args.wdir, name+'.jones') matrix_fname = os.path.join(args.wdir, name+'.mat') casprr_fname = os.path.join(args.wdir, name+'.rr') # Generate a multiple sequence alignment hhblits_cline = conkit.applications.HHblitsCommandLine(input=seq_fname, output=hhr_fname, database=hhblits_db, oa3m=a3m_fname, niterations=3, id=99, show_all=True, cov=60, diff='inf', maxfilt=500000) logging.info('Executing: {0}'.format(hhblits_cline)) hhblits_cline() # CCMpred requires alignments to be in the *jones* format - i.e. the format created # and used by David Jones in PSICOV msa_h = conkit.io.read(a3m_fname, 'a3m') logging.info('Total Number of sequences: {0}'.format(msa_h.nseqs)) try: logging.info('Number of effective sequences: {0}'.format(msa_h.calculate_meff())) except MemoryError: logging.info('Cannot calculate number of effective sequences: MemoryError') conkit.io.write(jon_fname, 'jones', msa_h) # Use the re-formatted alignment for contact prediction ccmpred_cline = conkit.applications.CCMpredCommandLine(alnfile=jon_fname, matfile=matrix_fname, threads=2, renormalize=True) logging.info('Executing: {0}'.format(ccmpred_cline)) ccmpred_cline() # Add sequence information to contact hierarchy con_h = conkit.io.read(matrix_fname, 'ccmpred') seq_h = conkit.io.read(seq_fname, 'fasta') # Do some post-prediction processing for visualisation and analysis for cmap, seq in zip(con_h, seq_h): cmap.sequence = seq cmap.remove_neighbors(min_distance=5, inplace=True) cmap.sort('raw_score', reverse=True, inplace=True) cmap = cmap[:cmap.sequence.seq_len] # subset the selection plot_fname = os.path.join(args.wdir, name + '.' + cmap.id +'.pdf') cmap.plot_map(file_format='pdf', file_name=plot_fname) logging.info('Plotted contact map: {0}'.format(plot_fname)) # Use the ccmpred parser to write a contact file conkit.io.write(casprr_fname, 'casprr', con_h) logging.info('Final prediction file: {0}'.format(casprr_fname)) if __name__ == "__main__": sys.exit(main())