#!/usr/bin/env ccp4-python # # Copyright (C) 2016 Ronan Keegan # # This code is distributed under the terms and conditions of the # CCP4 Program Suite Licence Agreement as a CCP4 Application. # A copy of the CCP4 licence can be obtained by writing to the # CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK. # # A simple sequence identity calculator # Ronan Keegan - 03/4/16 # import string, sys class simpleSeqID: """ A class to calculate a simple sequence identiy (local & global) local =over the aligned part of the sequence global=over the entire sequence of the target """ def __init__(self): self.inputALNfile="" self.inputTARfile="" def getPercent(self, inputALNseq, inputTARseq, fullTARseq): """ Generate seqID's for model-target """ j=0 # counts positions in first sequence i=0 # counts identity hits for amino_acid in inputALNseq: if amino_acid == '-': pass else: if amino_acid == inputTARseq[j]: i += 1 j += 1 j = 0 seq = str(inputALNseq) gap_strip = seq.replace('-', '') percent = 100*i/len(gap_strip) # Workout overall alignment identity over entire sequence of target alnLength=len(inputALNseq) tarLength=len(fullTARseq) overallPercent=int(percent*(float(alnLength)/float(tarLength))) return percent, overallPercent if __name__ == "__main__": inputALNseq="VAPARGDALDCGCGSGQASLGLAEFFERVHAVDPGEAQIRQ-----ALRHPRVTYAVAPAEDTGLPPASVDVAIAAQAMHWFDLDRFWA---ELRRVARPGAVFAA" inputTARseq="AGLIQGAVLDAGCGTGEDALHLAGLGYAVTGLDLSPTAISVARDKADARGLGAVFEVADALDLTGWEERFDTVIDSGLAHTFEGDRLRAYATALHRACRPGAVAHI" fullTARseq ="MGSSHHHHHHSSGLVPRGSHMTEVFDAVYRGESPFGKRPPWDIGAPQPAYVALEKAGLIQGAVLDAGCGTGEDALHLAGL\ GYAVTGLDLSPTAISVARDKADARGLGAVFEVADALDLTGWEERFDTVIDSGLAHTFEGDRLRAYATALHRACRPGAVAH\ ILSISDRGSAEMQARLAEAIDEIPAPLPDDDESPTLKRSADHLRDGFAEGWTIESIDESLMRGVIPTTSELLDVHAWLGR\ FRRDWNSSSVDKLAAALE" seqID=simpleSeqID() x,y=seqID.getPercent(inputALNseq, inputTARseq, fullTARseq) print x, y