################################################################################################### # # MULTI DOMAIN ASSEMBLER for Chimera v1.10 and up # by Sam Hertig, samuel.hertig@ucsf.edu # # Scroll down to the functions "mda" and "main" to get an idea of how the code works. # ################################################################################################### def OpenReplyLog(): from chimera import dialogs, tkgui dialogs.display(tkgui._ReplyDialog.name) def getUniprotID(pdbid, chain): from chimera.fetch import default_fetch_directory dbpath = default_fetch_directory() from os.path import expanduser, join dbpath = expanduser(dbpath) filename = join(dbpath, 'MDA', 'MDA_uniprot') import shelve from SeqAnnotations import uniprotIDs as uIDs shelf = shelve.open(filename, 'c') key = pdbid + chain try: uniprotids = shelf[key] except KeyError: try: uniprotids = uIDs(pdbid, chain) except: print 'no uniprot id found, assigning dummy id for ', pdbid, chain uniprotids = set(['-----']) shelf[key] = uniprotids shelf.close() return uniprotids def dot2Bars(structures): for s in structures: newseq = s.sequence.replace('.', '-') s.sequence = newseq def lowerMuts(structures): for s in structures: for i, aa in enumerate(s.sequence): if aa != structures[0].sequence[i] and aa != '-': s.sequence = s.sequence[:i] + aa.lower() + s.sequence[i+1:] def limitstructs(structures, limit, keepPDB, skipPDB, maxgapsize = 20): """ Custom winnowing: limits the number of structures imported but retains at least n structures per residues, where n=limit. """ if limit == 0: limit = len(structures) + 1 # parse keepPDB and skipPDB user input requiredPDBs = keepPDB.upper().replace(".PDB", "").split(",") undesiredPDBs = skipPDB.upper().replace(".PDB", "").split(",") # sort structures by percentId, then by BLAST score, then by length from operator import itemgetter, attrgetter temp1 = sorted(structures[1:], key=attrgetter('useqlength'), reverse=True) temp2 = sorted(temp1, key=attrgetter('score'), reverse=True) idsortedstructs = sorted(temp2, key=attrgetter('percentid'), reverse=True) # handle query sequence query = structures[0] # create dictionary that indicates the number of templates (value) for every residue (key) coveragedict = {} for i in range(query.uend): coveragedict[i] = 0 # selects structures to keep: selectedstructs = [] for s in idsortedstructs: skip = True if s.pdb in requiredPDBs: selectedstructs.append(s) continue if s.pdb in undesiredPDBs: continue newlycovered = 0 for i in range(s.ustart, s.uend): if coveragedict[i] < limit: newlycovered += 1 if newlycovered > maxgapsize: skip = False if not skip: for i in range(s.ustart, s.uend): coveragedict[i] += 1 selectedstructs.append(s) # reconstruct list of structures and return selectedstructs.append(query) temp = reversed(selectedstructs) structures = list(temp) return structures def elimAllGapColumns(structures): myseqs = [] for s in structures: myseqs.append(s.sequence) # get rid of all gap columns origlen = len(myseqs[0]) for i, col in enumerate(reversed(zip(*myseqs))): if col.count('-') == len(col): for j, seq in enumerate(myseqs): x = origlen - i - 1 myseqs[j] = seq[0:x] + seq[x+1:] # update mymatches for k, s in enumerate(structures): s.sequence = myseqs[k] def closeModels(availablemodels): import chimera chimera.openModels.close(availablemodels) availablemodels[:] = [] def processBlast(results, uid, path = '.', minscore = 50, includeNative = True, suppressdoubles = False, percentId = 0, winnow = '0', limit = 0, keepPDB = '', skipPDB = '', excludeSelected = False, deleteHidden = True, hideSubmodels = True, noConfirm = False): """ Parses BLAST results, filters results, writes output .txt and .fasta files, loads models. """ import chimera from blastpdb.ParserBlastP import Parser from chimera.replyobj import status # parse Blast try: parser = Parser("MDAquery", results, False) except SyntaxError, v: from chimera import replyobj replyobj.error("BLAST error: %s\n" % str(v)) # prepare the first match, which is always the query sequence structures=[] firstmatch = parser.matches[0] s = Structure('QUER', 'Y', firstmatch.sequence, 'SCO', firstmatch.qStart, firstmatch.qEnd) structures.append(s) # filtering and structure initialization chainscoredict = {} pdblist = [] for m in parser.matches[1:]: currentpdb = str(m.pdb[:4]) currentchain = str(m.pdb[5]) if m.score >= minscore or ( includeNative and uid in getUniprotID(currentpdb, currentchain) ): if (currentpdb, m.qStart, m.qEnd) not in chainscoredict: if suppressdoubles: if currentpdb in pdblist: continue # only add the match if its not a duplicate result which happens if the matching pdb has two or more identical chains s = Structure(currentpdb, currentchain, m.sequence, m.score, m.qStart, m.qEnd) structures.append(s) chainscoredict[(currentpdb, m.qStart, m.qEnd)] = s if suppressdoubles: pdblist.append(currentpdb) elif currentchain != chainscoredict[(currentpdb, m.qStart, m.qEnd)].chain: # add this identical chain into the altchains list attribute of the resp. structure chainscoredict[(currentpdb, m.qStart, m.qEnd)].altchains.add(currentchain) # convert dots to bars due to new blast parser for version 2.2.29 dot2Bars(structures) # make mutated residues lower case (used to be automatic in old blast version) lowerMuts(structures) # get rid of all gap columns elimAllGapColumns(structures) # process query q = structures[0] q.findBounds() q.percentid = 100.00 querysequence = q.sequence # filter structures by percent id fstructures = [] fstructures.append(q) for s in structures[1:]: s.findBounds() # make sure that the boundaries are updated (start and end residues...) s.calcPercentId(querysequence) if s.percentid >= percentId: fstructures.append(s) else: print 'Left out %s since the percent identity is only %f (threshold: %f).' % (s.pdb, s.percentid, percentId) structures = fstructures # sort matches by starting residue number, then by reverse ending residue number: def sortstructs(structures): from operator import itemgetter, attrgetter sortedstructs = sorted(structures, key=attrgetter('end'), reverse=True) structures = sorted(sortedstructs, key=attrgetter('start')) return structures structures = sortstructs(structures) # get rid of all gap columns elimAllGapColumns(structures) # update bounds: for s in structures: s.findBounds() # parse user input for limit and maxgapsize from chimera import UserError origlimit = limit try: limitlist = limit.split(",") limit = int(limitlist[0]) if len(limitlist) == 2: maxgapsize = int(limitlist[1]) elif len(limitlist) == 1: maxgapsize = 20 else: raise UserError("Unrecognized limit input: %s" % origlimit) except: raise UserError("Unrecognized limit input: %s" % origlimit) # limit number of structures structures = limitstructs(structures, limit, keepPDB, skipPDB, maxgapsize) structures = sortstructs(structures) # get rid of all gap columns elimAllGapColumns(structures) # update bounds: for s in structures: s.findBounds() # initialize output file from time import strftime from OpenSave import osOpen from os.path import join filename = join(path, 'MDA_output_%s.txt' % uid) fout = osOpen(filename, 'w') fout.write('Output from MultiDomainAssembler generated at ' + strftime("%Y-%m-%d %H:%M:%S") + '. \n') if winnow != '0': fout.write( 'Blast winnowing: %s (max. nr. of hits per query region).\n' % winnow ) print 'Blast winnowing: %s (max. nr. of hits per query region).' % winnow fout.write( 'A total of %d Blast hits were found.\n' % (int(len(parser.matches)) - 1) ) print 'A total of %d Blast hits were found.' % (int(len(parser.matches)) - 1) if includeNative: fout.write('Including all native BLAST results. Non-native results filtered by a minimum score of %d.\n' % (minscore)) print 'Including all native BLAST results. Non-native results filtered by a minimum score of %d.' % (minscore) else: fout.write('All BLAST results filtered by a minimum score of %d.\n' % (minscore)) print 'All BLAST results filtered by a minimum score of %d.' % (minscore) if suppressdoubles: fout.write('Only including the best hit per pdb.\n') print 'Only including the best hit per pdb.' else: fout.write('Allowing multiple hits per pdb.\n') print 'Allowing multiple hits per pdb.' fout.write('Percent ID threshold: %3.2f%%.\n' % (percentId)) print 'Percent ID threshold: %3.2f%%.' % (percentId) if skipPDB: fout.write('Skipping the PDB files %s if found by BLAST.\n' % (skipPDB.upper().replace(".PDB", "").split(","))) print 'Skipping the PDB files %s if found by BLAST.' % (skipPDB.upper().replace(".PDB", "").split(",")) if limit != 0: fout.write('Limiting hits: allowing %d structures per residue in target sequence (allowing gaps of max. %d residues). \n' % (limit, maxgapsize)) print 'Limiting hits: allowing %d structures per residue in target sequence (allowing gaps of max. %d residues).' % (limit, maxgapsize) if keepPDB: fout.write('Retaining the PDB files %s if found by BLAST.\n' % (keepPDB.upper().replace(".PDB", "").split(","))) print 'Retaining the PDB files %s if found by BLAST.' % (keepPDB.upper().replace(".PDB", "").split(",")) nrofstructs = len(structures)-1 fout.write('Number of structures to be imported: %d.\n' % nrofstructs) print 'Number of structures to be imported: %d.' % nrofstructs if nrofstructs == 0: fout.write('No structures passed the filtering, aborting mda.') print 'No structures passed the filtering, aborting mda.' raise chimera.CancelOperation # this error will be caught in main() if excludeSelected: fout.write('(Molecules selected in Chimera were deleted, see corresponding output.fa file.) \n') print '(Molecules selected in Chimera were deleted, see corresponding output.fa file..)' # print sequences with new order and res numbering rel. to query for s in structures: fout.write(s.pdb) fout.write(' ') fout.write(s.chain) fout.write(' ') fout.write(str('%4.2f' % (s.percentid)).rjust(7)) fout.write(' ') fout.write('%4s' % (str(s.score))) fout.write(' ') fout.write('%4d' % (s.ustart)) fout.write(' ') fout.write('%4d' % (s.uend)) fout.write(' ') fout.write('%4d' % (s.start)) fout.write(' ') fout.write('%4d' % (s.end)) fout.write(' ') fout.write(s.sequence) fout.write('\n') fout.write('\n \n') # make new list of all available models that are already open to keep track of unneeded models availablemodels = list(chimera.openModels.list(modelTypes=[chimera.Molecule])) #store selected models in selmolset if excludeSelected: from chimera import selection selectedmols = selection.currentMolecules() selmolset = set() for m in selectedmols: try: selmolset.add((str(m.name), m._mda_ustart)) except AttributeError: print "A model has been selected that was not previously imported by MDA, it will be deleted anyway." # check which of these models got modified (i.e. atom number changed) by the user or MDA, close them modifiedmodels = [] for m in availablemodels: try: if len(m.atoms) != m._mda_nrofatoms: print "Model %d (%s) has been modified in a previous MDA run, thus reloading that pdb file." % (m.id, str(m.name)) modifiedmodels.append(m) except AttributeError: modifiedmodels.append(m) for m in modifiedmodels: availablemodels.remove(m) closeModels(modifiedmodels) # warn user if a large number of models are about to be opened if nrofstructs > 10 and not noConfirm: #the limit depends on the patience of the user and the available RAM from chimera.baseDialog import AskYesNoDialog dlg = AskYesNoDialog("Are you sure you want to import %d pdb files?" % nrofstructs, default="Yes") if dlg.run(chimera.tkgui.app) == "no": raise chimera.CancelOperation # this error will be caught in main() # load models openedstructs = [] openedstructs.append(q) from chimera import tasks task = tasks.Task("Opening MDA models...", modal=True) try: for i,s in enumerate(structures[1:]): task.updateStatus('Opening %d of %d' % (i+1, nrofstructs)) s.getId() s.openModels(availablemodels, excludeSelected, deleteHidden, hideSubmodels) if s.models == None: # this only happens if the user presses the stop button while opening that particular model raise chimera.CancelOperation # this error will be caught in main() try: s.seqStructMapping() except ValueError: print 'WARNING: Not able to process sequence-structure association for', s.pdb, ', thus excluding this model.' closeModels(s.models) continue s.truncateSequence() s.findLigands() openedstructs.append(s) #print 'The structure', s.pdb, 'has the ligands', s.ligands, 'with a resp. length of', s.liglengths, 'residues.' finally: task.finished() structures = openedstructs # delete structures whose models have selected residues in case excludeSelected=true if excludeSelected: notselectedstructs = [] notselectedstructs.append(q) # the following two lines don't work, since some previously selected models might have been closed and reopened, thus loosing the selection: #from chimera import selection #selectedmols = selection.currentMolecules() # instead we use selmolset, see above: for s in structures[1:]: for m in s.models: if (str(m.name), m._mda_ustart) in selmolset: print "WARNING: Excluding %d: %s since it was selected by the user." % (m.id, s.pdb) closeModels(s.models) break elif m == s.models[-1]: notselectedstructs.append(s) structures = notselectedstructs if len(structures)-1 == 0: print 'Too many structures were selected for deletion, aborting mda.' raise chimera.CancelOperation # this error will be caught in main() # write FASTA alignment file writeFasta(structures, path, uid) # if we still have open models, we need to close these unneeded ones if len(availablemodels) > 0: closeModels(availablemodels) # get rid of all gap columns elimAllGapColumns(structures) # inform the user about structures that have missing coordinates: truncated = [s for s in structures if s.truncated] missing = [s for s in structures if s.missing] if truncated: fout.write('The following models have missing coordinates at the termini and thus \ntheir sequences were truncated in the alignment file:\n') print 'The following models have missing coordinates at the termini and thus their sequences were truncated in the alignment file:' for s in truncated: fout.write( '%s %s \n' % (s.models[0].id, s.pdb) ) print '%s %s' % (s.models[0].id, s.pdb) if missing: fout.write('\nThe following models have missing coordinates somewhere in the middle \nof their sequence and might thus not be ideal templates for Modeller:\n') print 'The following models have missing coordinates somewhere in the middle of their sequence and might thus not be ideal templates for Modeller:' for s in missing: fout.write( '%s %s \n' % (s.models[0].id, s.pdb) ) print '%s %s' % (s.models[0].id, s.pdb) # close output file and return results fout.close() return structures def destroyGroups(): """ Destroys all groups in model panel. """ from ModelPanel.base import groupCmd, _mp from ModelPanel.Group import Group if _mp is None: # Model Panel has not been created yet return foundagroup=True while foundagroup: items = _mp.items[:] foundagroup=False for i in items: if isinstance(i, Group): groupCmd([i]) foundagroup=True def groupSubModels(): """ Groups submodels in model panel. Normally this would happen automatically with "chimera.openModels.open", but we have set "checkForChanges" to "False"! """ import chimera from ModelPanel.base import groupCmd#, _mp openmodels = list(chimera.openModels.list(modelTypes=[chimera.Molecule])) allids = set([m.id for m in openmodels]) for currentid in allids: sameidmodels = [m for m in openmodels if m.id == currentid] if len(sameidmodels) > 1: groupCmd(sameidmodels, sameidmodels[0].name) def writeFasta(structures, path, uid): from OpenSave import osOpen from os.path import join filename = join(path, 'MDA_output_%s.fa' % uid) fout = osOpen(filename, 'w') query = structures[0] fout.write('> %s %d aa\n' % (uid, query.uend-query.ustart) ) fout.write(query.sequence+'\n') nrofhits = len(structures) - 1 i = 0 for s in structures[1:]: fout.write( '>%s %s %%ID %d \n' % (s.models[0].id, s.pdb, s.percentid) ) fout.write(s.sequence+'\n') i += 1 fout.close() class Structure: """ The main datastructure for open models. """ def __init__(self, pdb, chain, seq, score, start, end): self.pdb = pdb # a string self.chain = chain # the chain that was aligned in the BLAST self.sequence = seq # the sequence of the aligned chain self.seqlength = end - start + 1 # length of that sequence (can contain gaps within!) self.useqlength = None # ungapped sequence length, assigned by the truncateSequence() method self.score = score # BLAST score self.uniprot = None # string, uniprot ID of resp. pdb file self.models = [] # a list of model objects in chimera self.percentid = None # a custom % identity instead of the BLAST score self.template = False # is true if the sequence should be used as a template for match self.ligands = [] # a list of chain IDs that are ligands, i.e. have different uniprot IDs than self.uniprot self.liglengths = [] # see self.findLigands() self.overlap = None # nr. of residues that overlap with the previous structure (needed for match) self.ustart = start # number of the 1st aligned residue resp. to UniProt(no gaps) self.uend = end # number of the last aligned residue resp. to UniProt (no gaps) self.start = None # starting residue nr. resp. to query sequence (might have gaps) self.end = None # ending residue nr. resp. to query sequence (might have gaps) self.altchains = set() # other chains of the pdb that aligned equally well in Blast as self.chain self.seqobj = None # a sequence object of self.sequence, assigned in percentid method self.structseqobj = None # a sequence object of the actual sequence in the pdb, might have missing or extra res. self.matchmap = {} # a dictionary relating indices from self.sequence to self.structseqobj self.blastresind = [] # a list of indices where the matchmap actually points to an existing residue self.blastresinst = [] # a list of instances of residues in the matchmap self.covering = None # see hideSimilar() self.ishiddenunderneath = None # see hideSimilar() self.yshift = None # see arrangeModelsY() and rearrangeHidden() self.row = 1 # needed for y-placement, see arrangeModelsY() self.truncated = False # see truncateSequence()... self.missing = False # see truncateSequence()... self.Nmissingres = 0 # Nr. of residues that were deleted near N-terminus due to lacking coordinates def findBounds(self): totseqlength= len(self.sequence) #actually corresponds to totseqlength since it contains '-' for gaps! # find start for i in range(totseqlength): if self.sequence[i] != '-' : self.start = i break # find end for i in range(totseqlength): i = totseqlength - i - 1 if self.sequence[i] != '-' : self.end = i break def openModels(self, availablemodels, excludeSelected, deleteHidden, hideSubmodels): """ Only opens a new model if no open model with the same pdb ID is found. In case of excludeSelected=True, it needs to find a model previously opened with MDA and aligned at the same position. """ import chimera # check whether the model is already open samenamemodels = [m for m in availablemodels if m.name == self.pdb] # if selected models are to be deleted, we have to make sure we assign the same models to the BLAST hits that we had previously if excludeSelected: temp = [] for m in samenamemodels: try: if m._mda_ustart == self.ustart: temp.append(m) except AttributeError: print "Model %s doesn't seem to belong to any MDA structure previously, thus it will be deleted and reloaded." % str(m.id) samenamemodels = temp # if its empty open a new one if not samenamemodels: try: if deleteHidden and hideSubmodels: # this is much faster than loading all submodels and deleting them later #self.models = chimera._openPDBIDModel(self.pdb, explodeNMR=False) # alternative way of loading 1st model, but crashes for e.g. 2MFN.pdb self.models = chimera._openPDBIDModel(self.pdb)[:1] chimera.openModels.add(self.models) else: # traditional way of opening all submodels self.models = chimera.openModels.open(self.pdb, type="PDBID", checkForChanges=False) except: raise print 'No model exists for %s' % self.pdb self.models = [] # store number of atoms as an attribute of each model, see the creation of the list of availablemodels above... for m in self.models: m._mda_nrofatoms = len(m.atoms) # the prefix _mda_ hides this attributes from the attribute gathering process in the render by attribute tool m._mda_ustart = self.ustart # the prefix _mda_ hides this attributes from the attribute gathering process in the render by attribute tool else: currentid = samenamemodels[0].id for m in samenamemodels: if m.id == currentid: m._mda_ustart = self.ustart # the prefix _mda_ hides this attributes from the attribute gathering process in the render by attribute tool self.models.append(m) availablemodels.remove(m) def getId(self): import chimera uniprotset = getUniprotID(self.pdb, self.chain) self.uniprot = uniprotset.pop() # assume the set only contains one ID def calcPercentId(self, querysequence): from chimera.Sequence import percentIdentity, Sequence self.seqobj = Sequence() queryseqobj = Sequence() self.seqobj[:] = self.sequence queryseqobj[:] = querysequence self.percentid = percentIdentity(self.seqobj, queryseqobj) def findLigands(self): """ Finds chains in pdb that were not part of the BLAST results but in the same file (most likely complexed). """ import chimera from Measure import inertia for m in self.models[0:1]: #assume that the other models in an nmr bundle are the same as the first one... chainIDs = set([str(r.id.chainId) for r in m.residues]) #make a set to avoid double entries # the str() in the line above is essential on linux machines, since shelve uses dumbdbm there which needs strings and not unicodes! if len(chainIDs) > 1: # remove the ID which we are assembling chainIDs.remove(self.chain) # create dictionary mapping from uniprot ids to chains ligands = {} chainIDs2 = set() for c in chainIDs: id = (getUniprotID(self.pdb, c)).pop() if id != self.uniprot: chainIDs2.add(id) if id not in ligands: ligands[id] = [c] else: ligands[id].append(c) # find receptor atom coordinates: recatomlist = [r.findAtom('CA') for r in m.residues if (r.findAtom('CA') and r.id.chainId == self.chain)] axes, moments, reccenter = inertia.atoms_inertia(recatomlist) # loop over ligand uniprot IDs for id in chainIDs2: chains = ligands[id] mindistance = 1000000 # loop over chain letters with the same uniprot ID for c in chains: # find ligand atom coordinates: ligatomlist = [chimera.misc.principalAtom(r) for r in m.residues if r.id.chainId == c and chimera.misc.principalAtom(r)] if len(ligatomlist) ==0: #ugly but works... ligatomlist = [r.atoms[0] for r in m.residues if r.id.chainId == c and r.atoms] axes, moments, ligcenter = inertia.atoms_inertia(ligatomlist) diff = reccenter - ligcenter distance = ( (diff[0])**2 + (diff[1])**2 + (diff[2])**2 )**(0.5) if distance < mindistance: ligandchain = c ligandlength = len(ligatomlist) mindistance = distance self.ligands.append(ligandchain) self.liglengths.append(ligandlength) def seqStructMapping(self): """ Establishes a map linking sequence residues to model residues. Raises a ValueError if sthg. goes wrong. """ threshold = 10 errortolerance = 0.1 from chimera.Sequence import Sequence from MultAlignViewer.structAssoc import tryAssoc, estimateAssocParams origchain = self.chain allchains = (self.altchains | set(self.chain)) for chain in allchains: # match the s.sequence residue numbers to pdb residue numbers try: curstructseqobj = self.models[0].sequence(chain) except KeyError: print 'Chain %s of structure %s has been deleted in a previous MDA run or by the user, thus keeping chain %s as the structure-associated chain.' % (chain, self.pdb, self.chain) continue estlen, segments, gaps = estimateAssocParams(curstructseqobj) try: curmatchmap, errors = tryAssoc(self.seqobj, curstructseqobj, segments, gaps, estlen, maxErrors=errortolerance*estlen) except ValueError: print 'Error while determining seqstruct matchmap residues for ', self.pdb, chain, '.' raise if len(curmatchmap) > len(self.matchmap): self.matchmap = curmatchmap self.chain = chain self.structseqobj = curstructseqobj if origchain != self.chain: print 'Found a better structure-sequence association for chain ', self.chain, ' in model ', self.pdb, '.' print '(Compared to the original chain', origchain, 'found by BLAST.)' if len(self.matchmap) < threshold: print 'Bad structure-sequence association for chain ', self.chain, ' in model ', self.pdb, '.' #raise ValueError # store residue indices of matchmap that exist: for i,char in enumerate(self.seqobj.ungapped()): try: r = self.matchmap[i] except KeyError: continue self.blastresind.append(i) self.blastresinst.append(r) # in rare cases, none of the coordinates of the residues that aligned in BLAST will be in the structure if self.blastresind == []: print "None of the coordinates of the residues that aligned in BLAST are in the structure of %s." % self.pdb raise ValueError def truncateSequence(self): """ Truncates the sequence of structures if the termini have residues with missing coordinates """ useq = self.seqobj.ungapped() useqlength = len(useq) # find missing residues: missingres = [] for i in range(useqlength): if i not in self.blastresind: missingres.append(i) if len(missingres) > 0: # categorize into res. missing at the C-terminus and N-terminus Nmissingres = [i for j,i in enumerate(missingres) if i-j == 0] if missingres[-1] == useqlength - 1: lastdiff = missingres[-1] - len(missingres) + 1 Cmissingres = [ i for j,i in enumerate(missingres) if i-j == lastdiff ] else: Cmissingres = [] # edit sequence: truncseq = self.sequence if len(Nmissingres) > 0: print "Structure %s is missing coordinates for the following N-terminal residues: %s" % (self.pdb, str(Nmissingres)) i = self.start j = i while i in range(self.start, self.start+len(Nmissingres)): if truncseq[j] != '-': i += 1 truncseq = truncseq[0:j] + '-' + truncseq[j+1:] j += 1 self.sequence = truncseq self.findBounds() self.ustart = self.ustart + len(Nmissingres) # mark as truncated: self.truncated = True if len(Cmissingres) > 0: print "Structure %s is missing coordinates for the following C-terminal residues: %s" % (self.pdb, str(Cmissingres)) i = 0 for pos, char in reversed(list(enumerate(self.sequence))): if char != '-': i += 1 truncseq = truncseq[0:pos] + '-' + truncseq[pos+1:] if i == len(Cmissingres): break self.sequence = truncseq self.findBounds() self.uend = self.uend - len(Cmissingres) # mark as truncated: self.truncated = True # mark if there are missing coordinates inbetween N and C: if len(missingres) > len(Nmissingres) + len(Cmissingres): self.missing = True # store nr of N-terminal missing res. self.Nmissingres = len(Nmissingres) # store actual sequence length self.useqlength = useqlength - len(missingres) def reduceModels(Structures, hideSubmodels = True, hideComplex = False, hideAltChain = True, deleteHidden = True): """ Hides or deletes unwanted models/chains. """ import chimera from Midas import unmodeldisplay, undisplay, unribbon # hide or delete all subids after the first one modelstobeclosed=[] for s in Structures[1:]: for m in reversed(s.models): if m.subid > 1: if hideSubmodels: if deleteHidden: s.models.remove(m) # has to be removed from the modellist since otherwise underlying stuff is missing when referenced later modelstobeclosed.append(m) # select for deletion print 'Deleting model %s.' % str(m) else: unmodeldisplay(str(m)) print 'Hiding model %s.' % str(m) else: chainIDs = set([r.id.chainId for r in m.residues]) if len(chainIDs) > 1: # delete or hide all but one chain that was retrieved by BLAST earlier chainIDs.remove(s.chain) # make sure the main chain doesn't get hidden for c in chainIDs: identifier = str(m) + ':.' + c if c not in s.ligands and hideAltChain: if deleteHidden: print 'Deleting alternate chain %s of model %s.' % (c, str(m)) chimera.runCommand('delete %s' % identifier) #better option: use chimera.deleteAtom or similar... else: print 'Hiding alternate chain %s of model %s.' % (c, str(m)) undisplay(identifier) # might not be necessary... unribbon(identifier) elif c in s.ligands and hideComplex: if deleteHidden: print 'Deleting complexed chain %s of model %s.' % (c, str(m)) chimera.runCommand('delete %s' % identifier) #better option: use chimera.deleteAtom or similar... else: print 'Hiding complexed chain %s of model %s.' % (c, str(m)) undisplay(identifier) # might not be necessary... unribbon(identifier) # close doomed models: if hideSubmodels: chimera.openModels.close(modelstobeclosed) # delete ions if desired: if hideComplex: if deleteHidden: chimera.runCommand('delete ions') else: chimera.runCommand('~show ions') class Box: """ Within a box, there should be enough overlap to align models using the match command. """ def __init__(self, number): self.nr = number self.structures = [] def __str__(self): return 'Box number %2d contains %3d structures' % (self.nr, len(self.structures)) def appendStructure(self, s): self.structures.append(s) def findtemplate(self, i): if self.structures[i].template: return self.structures[i] else: if i > 0: return self.findtemplate(i-1) else: #print 'No template structure for matching found.' return self.structures[0] class Gap: """ Handles gaps (where no model was found) and draws spheres proportional to the number of missing residues. """ def __init__(self, aa, x): self.length = aa # length in residues # (1.21 x MW) Angstrom^3 / molecule, see www.basic.northwestern.edu/biotools/proteincalc.html#vol # assume 110 Da per aa -> V = 133 * aa = 4/3 Pi r^3 self.radius = 3.17*(float(aa))**(1./3) # radius of the sphere self.minx = x def __str__(self): x = self.minx + self.radius return 'This gap has a length of %d amino acids, represented by a sphere at x = %g with radius r = %g.' % (self.length, x, self.radius) def drawsphere(self, sphereID): import chimera from Shape import shapecmd xpos = self.minx + self.radius zpos = 0 ypos = 0 spherename = 'Gap of %d residues' % self.length sphere = shapecmd.sphere_shape(radius = self.radius, center = '%f, %f, %f' % (xpos, ypos, zpos), rotation = None, qrotation = None, coordinateSystem = None, divisions = 72, color = (0.663,0.663,0.663,0.5), mesh = False, linewidth = 1, slab = None, modelName = spherename, modelId = sphereID) sphere = chimera.openModels.list()[-1] # the above line does not return a model, so I need this line... CAUTION; only works if sphereID is high enough # the coordinate are relative to the first open model, so transform the sphere with the inverse of the first model's xform model0 = chimera.openModels.list(modelTypes=[chimera.Molecule])[0] model0pos = model0.openState.xform invmodel0pos = chimera.Xform.inverse(model0pos) spherepos = sphere.openState.xform spherepos.multiply(invmodel0pos) sphere.openState.xform = spherepos def diameter(self): return 2*(self.radius) def createBoxes(Structures): """ Groups structures into boxes. Within a box, there should be enough overlap to align models using the match command. """ minoverlap = 10 # minimum overlap of two sequences such that they will be used with match prevend = 0 prevxslot = 0 prevseq = [] Boxes = [] for s in Structures[1:]: # calculate length of aligning sequence, which will be used later in hideSimilar() s.length = s.end - s.start curseq = s.sequence if prevseq != []: overlapaalist = [caa for caa, paa in zip(curseq[s.start:prevend], prevseq[s.start:prevend]) if (caa != '-' and paa != '-')] curoverlap=len(overlapaalist) else: curoverlap = -1 s.overlap = curoverlap if curoverlap >= minoverlap or s.length <= minoverlap: curxslot = prevxslot else: curxslot = prevxslot + 1 b = Box(curxslot) Boxes.append(b) b.appendStructure(s) prevxslot = curxslot if s.end >= prevend: prevseq = curseq prevend = s.end s.template = True # will be used for match return Boxes def hideSimilar(Boxes, uid, group): """ Hides models that are considered to be in the same group (only if group=True). """ # if no grouping desired, all structures must be labeled as covering structures if not group: for b in Boxes: for s in b.structures: s.covering = True return # otherwise, continue to grouping procedure: print "Hiding structures that are similar to each other..." overhang = 25 # number of aa that can differ from the ends of two structures where one will be hidden showligands = False # if true, an extra group for structures with ligands will be created import chimera from Midas import unmodeldisplay for b in Boxes: # classify structures as native/foreign and ligands n_structs = [] f_structs = [] n_noligstructs = [] n_ligstructs = [] f_noligstructs = [] f_ligstructs = [] for s in b.structures: if s.uniprot == uid and s.ligands == []: n_structs.append(s) n_noligstructs.append(s) elif s.uniprot == uid and s.ligands != []: n_structs.append(s) n_ligstructs.append(s) elif s.ligands == []: f_structs.append(s) f_noligstructs.append(s) else: f_structs.append(s) f_ligstructs.append(s) def classviz(allstructures): ''' Classifies structures into covering and covered structures, hides covered ones. Covering structures are the longest ones in their subbox and slightly different from s.template. ''' # find covering structures: prevend = -overhang prevcoverstruct = None coveringstructs = [] for s in allstructures: if prevend + overhang < s.end: # check whether the starting residue of the previous covering structure isn't too far back if prevcoverstruct and prevcoverstruct.start + overhang > s.start: prevcoverstruct.covering = False # then the previous covering structure doesn't need to be one! coveringstructs.remove(prevcoverstruct) print prevcoverstruct.pdb, prevcoverstruct.models[0].id, 'is not covering other structures, instead:' s.covering = True coveringstructs.append(s) prevend = s.end prevcoverstruct = s #print s.pdb, s.models[0].id, 'is covering other structures.' # find covered structures and hide them for s in allstructures: if not s.covering: for c in coveringstructs: if s.end < c.end + overhang: s.ishiddenunderneath = c #print s.pdb, s.models[0].id, "is hidden underneath", c.pdb, c.models[0].id for m in s.models: unmodeldisplay(str(m)) break # the fist covering structure is the one we want return coveringstructs def groupmodels(allstructures, coveringstructures): ''' Make groups in model panel. ''' from ModelPanel.base import groupCmd, getGroupOf showModelPanel() # Have to create model panel for group for c in coveringstructures: itemstogroup = [] # add cover structure to groupable items modelzero = c.models[0] item = getGroupOf(modelzero) if not item: for m in c.models: itemstogroup.append(m) # m is a model else: itemstogroup.append(item) # item is a group # add to be hidden structures to groupable items for s in allstructures: if s.ishiddenunderneath == c: modelzero = s.models[0] item = getGroupOf(modelzero) if not item: for m in s.models: itemstogroup.append(m) # m is a model else: itemstogroup.append(item) # item is a group # cannot make a group if only one item groupsize = len(itemstogroup) if groupsize <= 1: continue # name of group that will appear in model panel name = '%s and %d similar' % (c.pdb, groupsize-1) # make group mygroup = groupCmd(itemstogroup, name) # execute for all desired lists if showligands: coveringstructs = classviz(n_noligstructs) groupmodels(n_noligstructs, coveringstructs) coveringstructs = classviz(n_ligstructs) groupmodels(n_ligstructs, coveringstructs) coveringstructs = classviz(f_noligstructs) groupmodels(f_noligstructs, coveringstructs) coveringstructs = classviz(f_ligstructs) groupmodels(f_ligstructs, coveringstructs) else: coveringstructs = classviz(n_structs) groupmodels(n_structs, coveringstructs) coveringstructs = classviz(f_structs) groupmodels(f_structs, coveringstructs) def showModelPanel(): from chimera import dialogs import ModelPanel dialogs.display(ModelPanel.ModelPanel.name) def closeSpheres(): import chimera tocloselist = [m for m in chimera.openModels.list() if 'Gap of' in m.name] chimera.openModels.close(tocloselist) def arrangeModelsX(Boxes, Structures, uid, totseqlength): """ Lines up models along X-axis according to their position in the BLAST alignment. """ xpadding = 0 # adjust for tuning x-placement (in Angstrom) Gaps = [] import chimera, Matrix from Measure import inertia from numpy import float32 # call match within boxes for b in Boxes: nrofstructs = len(b.structures) if nrofstructs > 1: # only prepare matchmaking if we have at least two structures for i in range(nrofstructs-1): # loop over structure-pairs in the box # for j = 0, use match curstruct = b.structures[i+1] curmodelzero = curstruct.models[0] curnrofsubmodels = len(curstruct.models) # use a function to check whether the ith structure is suitable or whether we need to go further back tempstruct = b.findtemplate(i) tempmodelzero = tempstruct.models[0] # find atoms that align and are not gaps (atomlists have to be the same length for current structure and template!) tempstructatoms = [] curstructatoms = [] temprescounter = tempstruct.Nmissingres-1 currescounter = curstruct.Nmissingres-1 for (tempres, curres) in zip(tempstruct.sequence, curstruct.sequence): if tempres != '-': temprescounter +=1 if curres != '-': currescounter +=1 if tempres != '-' and curres != '-': try: curresobj = curstruct.matchmap[currescounter] except KeyError: continue try: tempresobj = tempstruct.matchmap[temprescounter] except KeyError: continue tempatom = tempresobj.findAtom("CA") curatom = curresobj.findAtom("CA") if tempatom is not None and curatom is not None: tempstructatoms.append(tempatom) curstructatoms.append(curatom) # check whether we have enough atoms to call match lent = len(tempstructatoms) lenc = len(curstructatoms) if lent < 3 or lenc < 3 or lenc != lent: print 'Could not align %s to %s since not enough residues were found to call match directly. Trying matchmaker...' % (curstruct.pdb, tempstruct.pdb) # use matchmaker... #atomspec1 = str(tempmodelzero) + ':.' + tempstruct.chain #atomspec2 = str(curmodelzero) + ':.' + curstruct.chain #chimera.runCommand("mmaker %s %s ss false iterate false pairing ss" % (atomspec1, atomspec2)) chimera.runCommand("mmaker %s %s" % (str(tempmodelzero), str(curmodelzero))) # store position after alignment curpos = curmodelzero.openState.xform # for j > 0, use the stored position of the 0th model for j in range(curnrofsubmodels-1): curmodel = curstruct.models[j+1] curmodel.openState.xform = curpos else: # call match only if it will work properly from chimera import match xform, rmsd = match.matchAtoms(tempstructatoms, curstructatoms) xf = tempmodelzero.openState.xform xf.multiply(xform) curmodelzero.openState.xform = xf chimera.replyobj.info("RMSD between %s and %s over %d residues is %.3f angstroms\n" % (tempstruct.pdb, curstruct.pdb, lenc, rmsd)) # for j > 0, use the stored position of the 0th model for j in range(curnrofsubmodels-1): curmodel = curstruct.models[j+1] curmodel.openState.xform = xf # position boxes along the x-axis prevxmax = 0 prevend = -1 for b in Boxes: # find first and last structure firststruct = None laststruct = None i = 0 for i in range(len(b.structures)): if b.structures[i].template and not firststruct: firststruct = b.structures[i] if b.structures[-1-i].template and not laststruct: laststruct = b.structures[-1-i] if not firststruct: firststruct = b.structures[0] if not laststruct: laststruct = b.structures[-1] # find coordinates of N and C terminus firstmodel = firststruct.models[0] lastmodel = laststruct.models[0] # finding the N terminus: i = 0 while True: Nind = firststruct.blastresind[i] resN = firststruct.matchmap[Nind] caN = resN.findAtom('CA') if caN: coordN = caN.xformCoord() break i += 1 #print 'An N-terminal residue of %s is missing a C_alpha atom.' % (firststruct.pdb) # finding the C terminus: i = 0 while True: Cind = laststruct.blastresind[-1-i] resC = laststruct.matchmap[Cind] caC = resC.findAtom('CA') if caC: coordC = caC.xformCoord() break i += 1 #print 'A C-terminal residue of %s is missing a C_alpha atom.' % (laststruct.pdb) # define translation to center relevantatoms = [resN.findAtom('CA'), resC.findAtom('CA')] axes, moments, center = inertia.atoms_inertia(relevantatoms) cx, cy, cz = center # in global coordinates cxf = chimera.Xform.translation(-cx, -cy, -cz) # define rotation to orient along x-axis axis = coordC - coordN xN = Matrix.inner_product(axis, coordN) xC = Matrix.inner_product(axis, coordC) correctorientation = (xN < xC) if not correctorientation: axis = -axis rtf = Matrix.vector_rotation_transform(axis.data(), (1, 0, 0)) rxf = Matrix.chimera_xform(rtf) # execute transformations for s in b.structures: for m in s.models: xf = chimera.Xform() xf.premultiply(cxf) xf.premultiply(rxf) xf.multiply(m.openState.xform) m.openState.xform = xf # figure out how much to translate along x # determine length of box xmin, xmax = 0, 0 for s in b.structures: if s.ishiddenunderneath: continue relevantatoms = [r.findAtom('CA') for r in s.models[0].residues if (r.findAtom('CA') and r.id.chainId == s.chain and r in s.blastresinst)] axyz = chimera.numpyArrayFromAtoms(relevantatoms, xformed=True).astype(float32) curxmin, curxmax = axyz[:,0].min(), axyz[:,0].max() if curxmin < xmin: xmin = curxmin if curxmax > xmax: xmax = curxmax xoverlap = prevxmax - xmin # determine gap between current and previous box: curstart = firststruct.start gaplength = curstart - prevend - 1 # store the last residue of this box for the next iteration prevend = laststruct.end # create gap instance if necessary if gaplength <= 0 and prevxmax != 0: gaplength, gapspacing = 0, 0 else: g = Gap(gaplength, prevxmax + xpadding) Gaps.append(g) gapspacing = g.diameter() # set translation xshift = xoverlap + gapspacing + 2*xpadding # store values for next iteration prevxmax = xmax + xshift # define x translation txf = chimera.Xform.translation(xshift, 0, 0) # execute transformations for s in b.structures: for m in s.models: xf = chimera.Xform() xf.premultiply(txf) xf.multiply(m.openState.xform) m.openState.xform = xf # add final gap (from end of Blast alignment to end of UniProt sequence) i = 0 while i >= 0: if Structures[-1-i].template: j = -1-i break i += 1 lastgaplength = (Structures[0].end - Structures[j].end) + (totseqlength - Structures[0].uend) - 1 g = Gap(lastgaplength, prevxmax + xpadding) Gaps.append(g) # finally, draw all spheres to represent gaps sphereID = 1000 # a high model id ensure that the spheres are last in the list of open models... for g in Gaps: if g.radius > 0: sphereID += 1 g.drawsphere(sphereID) return Gaps def arrangeModelsY(Boxes, Gaps, hideComplex): """ Stacks models along Y-axis. """ ypadding = 10 # adjust for tuning y-placement (in Angstrom) import chimera for b in Boxes: # find boundaries def findboundaries(hideComplex, listofstructs): from numpy import float32 for s in listofstructs: if hideComplex: atoms = [r.findAtom('CA') for r in s.models[0].residues if (r.findAtom('CA') and r.id.chainId == s.chain)] else: atoms = [r.findAtom('CA') for r in s.models[0].residues if ( r.findAtom('CA') and ((r.id.chainId == s.chain) or (r.id.chainId in s.ligands)) )] axyz = chimera.numpyArrayFromAtoms(atoms, xformed=True).astype(float32) s.xmin = axyz[:,0].min() s.xmax = axyz[:,0].max() s.ymin = axyz[:,1].min() s.ymax = axyz[:,1].max() #print 'PDB, Xmin, Xmax, Ymin, Ymax', s.pdb, s.xmin, s.xmax, s.ymin, s.ymax findboundaries(hideComplex, b.structures) # assigning y-positioning row def assignrow(row, listofstructs): thresstruct = None newrow = False coveringstructs = [s for s in listofstructs if s.covering] hiddenstructs = [s for s in listofstructs if s.ishiddenunderneath] for s in coveringstructs: if s.row == row: if thresstruct == None: thresstruct = s elif s.xmin < thresstruct.xmax: s.row = row + 1 newrow = True else: s.row = row thresstruct = s for s in hiddenstructs: s.row = s.ishiddenunderneath.row if newrow: assignrow(row+1, listofstructs) assignrow(1, b.structures) # move the center of all structures to y = 0 def yalign(listofstructs): for s in listofstructs: # find y-coordinate of center of structure curcenter = (s.ymax + s.ymin ) / 2. yshift = 0 - curcenter # define y translation tyf = chimera.Xform.translation(0, yshift, 0) # store y-shift if needed later s.yshift = yshift # execute for m in s.models: xf = chimera.Xform() xf.premultiply(tyf) xf.multiply(m.openState.xform) m.openState.xform = xf yalign(b.structures) # update boundaries findboundaries(hideComplex, b.structures) # move covering structures (or all if group is false), "down" controls direction (True: downwards or False: upwards) def movey(row, listofstructs, down = True): shift = False coveringstructs = [s for s in listofstructs if s.covering] ymax = coveringstructs[0].ymin ymin = coveringstructs[0].ymax # find extrema for s in coveringstructs: if s.row == row: if s.ymax > ymax: ymax = s.ymax elif s.row == row + 1: if s.ymin < ymin: ymin = s.ymin # move for s in coveringstructs: if s.row > row: shift = True yoverlap = ymax - ymin yshift = yoverlap + ypadding if down: yshift = -yshift # store additional y-shift if needed later s.yshift += yshift # define y translation tyf = chimera.Xform.translation(0, yshift, 0) # execute for m in s.models: xf = chimera.Xform() xf.premultiply(tyf) xf.multiply(m.openState.xform) m.openState.xform = xf # iterate recursively if shift: movey(row+1, listofstructs, down) movey(1, b.structures, down = True) #move hidden structures (or none if group is false): def rearrangeHidden(listofstructs): import chimera hiddenstructs = [s for s in listofstructs[1:] if s.ishiddenunderneath] for s in hiddenstructs: refstruct = s.ishiddenunderneath # the difference of the two shifts is what we need to translate with yshift = refstruct.yshift - s.yshift tyf = chimera.Xform.translation(0, yshift, 0) for m in s.models: xf = chimera.Xform() xf.premultiply(tyf) xf.multiply(m.openState.xform) m.openState.xform = xf rearrangeHidden(b.structures) def setAttributes(Structures): """ Sets attributes for models and residues. These can be used by the Color-by-attributes-tool in Chimera to set custom colors. """ for s in Structures[1:]: for m in s.models: for r in m.residues: if str(r.id.chainId) not in s.ligands: # only set these attributes for non-ligand chains: r.mda_percentid = s.percentid r.mda_blastscore = s.score for r in s.models[0].residues: if str(r.id.chainId) in s.ligands: r.mda_alignment = 4.0 # ligand res else: r.mda_alignment = 3.0 # blast chain res, should be made transparent for i,char in enumerate(s.seqobj.ungapped()): try: r = s.matchmap[i] except KeyError: continue if i in s.blastresind: if char.islower(): r.mda_alignment = 2.0 # mutation else: r.mda_alignment = 1.0 # part of blast alignment # make sure attributes will get stored in session: import chimera from SimpleSession.save import registerAttribute registerAttribute(chimera.Residue, "mda_percentid") registerAttribute(chimera.Residue, "mda_blastscore") registerAttribute(chimera.Residue, "mda_alignment") def colorModels(Structures, coloring= 'mutations', panelcolor = False): """ Coloring of models, three different options: 'mutations' (default), 'percentid' or 'blastscore' """ import chimera coloring= coloring.lower() # define base colors mutcolor = "1.0,0.0,0.0,1.0" # Red ligcolor = "0.0,0.0,1.0,1.0" # Blue blastcolor = "0.66,0.66,0.66,1.0" # Dark Grey noblastcolor = blastcolor # color icons in model panel: if panelcolor: for s in Structures[1:]: if s.percentid == 100: for m in s.models: m.color = chimera.MaterialColor(*[float(x) for x in blastcolor.split(',')]) else: for m in s.models: m.color = chimera.MaterialColor(*[float(x) for x in mutcolor.split(',')]) # coloring by residue chimera.runCommand( "rangecolor mda_alignment,r 1 %s 2 %s 3 %s 4 %s" % (blastcolor, mutcolor, noblastcolor, ligcolor) ) if coloring in 'percentid' and len(coloring) >= 3: chimera.runCommand( "rangecolor mda_percentid,r min %s max %s" % (mutcolor, blastcolor) ) elif coloring in 'blastscore' and len(coloring) >= 3: chimera.runCommand( "rangecolor mda_blastscore,r min %s max %s" % (mutcolor, blastcolor) ) elif coloring in 'mutations' and len(coloring) >= 3: pass else: print 'Coloring command "%s" not understood, used option "mutations". (Please enter either "mutations", "blastscore" or "percentid".)' % coloring # make residues not aligned in BLAST significantly transparent chimera.runCommand( "transparency 80,r :/mda_alignment=3" ) # color ions like ligands chimera.runCommand( "color %s ions" % ligcolor ) def findMAV(): """ Finds MAV instances. """ from chimera.extension import manager from MultAlignViewer.MAViewer import MAViewer mavs = [inst for inst in manager.instances if isinstance(inst, MAViewer)] return mavs def openMAV(path, uid, structures): """ Imports FASTA file generated in processBlast(), does sequence-structure associations. """ import chimera from os.path import join from MultAlignViewer.MAViewer import MAViewer # open fastafile generated by mda filename = join(path, 'MDA_output_%s.fa' % uid) mav = MAViewer(filename, autoAssociate=False) # loop over structures and make associations i = 0 templateModels = [] for s in structures[1:]: i += 1 # seqs[0] is the query sequence, which we don't want to associate with any model mol = s.models[0] templateModels.append(mol) actualseq = mav.seqs[i] mav.prematchedAssocStructure(actualseq, s.structseqobj, s.matchmap, 0, False, False, False) #slower: mav.associate([mol], seq = actualseq, force=True) return mav, templateModels def openMod(path, uid, mav, templateModels, web=False): """ Opens Modeller window, selects all templates and sets MDA-specific default values for nr. of models, thorough optimization and Output path. """ from os.path import join, exists from os import makedirs # default path for Modeller output models tempPath = join(path, 'Modeller_%s' % uid) if not exists(tempPath): makedirs(tempPath) # function for calling the Modeller GUI def MODgui(): # open Modeller dialogue mav._showModellerHomologyDialog() mod = mav.modellerHomologyDialog # default to only one output model mod.previousnumModel = 1 mod.numModelEntry.setvalue(mod.previousnumModel) # default to thorough optimization being active mod.optimLabel.config(state='active') mod.optimVar.set(1) mod.optimCheckButton.config(state='active') # set Modeller path as specified by MDA mod.pathTempOpt.set(tempPath) # default to select all templates mod.tplTable.select(mod.candidateSeqs) MODgui() def main(results, uid, path, totseqlength, minscore = 50, includeNative = True, suppressdoubles = False, percentId = 0, group = False, hideSubmodels = True, hideComplex = False, hideAltChain = True, deleteHidden = True, coloring= 'mutations', winnow = '0', limit = '0', keepPDB = '', skipPDB = '', excludeSelected = False, showAlignment = True, noConfirm = False): """ Main mda routine, stepwise processing of different tasks: 1) filters BLAST results and opens models (see processBlast()...) 2) deletes or hides unwanted models/chains 3) arranges models in space 4) colors models 5) opens MAV and Modeller """ import time tstart = time.time() # for performance optimization import chimera from chimera.replyobj import status from chimera.tkgui import app from chimera import update status('Fetching BLAST results completed, now filtering results and loading structures...') print('Fetching BLAST results completed, now filtering results and loading structures...') app.update() try: structures = processBlast(results, uid, path, minscore, includeNative, suppressdoubles, percentId, winnow, limit, keepPDB, skipPDB, excludeSelected, deleteHidden, hideSubmodels, noConfirm) except chimera.CancelOperation: status('MDA aborted by user.') print('MDA aborted by user.') return status('Now updating model information...') print('Now updating model information...') update.checkForChanges() # this line is needed due to a Chimera bug that will otherwise let MDA crash since models are destroyed before changes to Python objects are updated app.update() status('Sorting items in model panel...') print('Sorting items in model panel...') destroyGroups() groupSubModels() status('Now deleting/hiding alternate chains and/or extra submodels in multimodels...') print('Now deleting/hiding alternate chains and/or extra submodels in multimodels...') app.update() chimera.runCommand("modeldisp; ribbon") reduceModels(structures, hideSubmodels, hideComplex, hideAltChain, deleteHidden) status('Now arranging structures...') print('Now arranging structures...') app.update() boxes = createBoxes(structures) hideSimilar(boxes, uid, group) closeSpheres() gaps = arrangeModelsX(boxes, structures, uid, totseqlength) chimera.runCommand("focus; ~set independent; savepos overlay") # use "reset overlay" to retrieve position where structures are not stacked in Y, before calling Modeller arrangeModelsY(boxes, gaps, hideComplex) status('Arranging completed, now applying coloring...') print('Arranging completed, now applying coloring...') app.update() setAttributes(structures) colorModels(structures, coloring) if showAlignment: status('Coloring completed, loading sequence alignment...') print('Coloring completed, loading sequence alignment...') app.update() mav, templateModels = openMAV(path, uid, structures) status('Now opening Modeller interface...') print('Now opening Modeller interface...') app.update() openMod(path, uid, mav, templateModels) chimera.runCommand("setattr p display 0; focus; set independent; savepos stacked; ~show") #~ions") # use "reset stacked" to retrieve default position tend = time.time() # for performance optimization status('MDA completed after %d seconds.' % int(tend - tstart)) print('MDA completed after %d seconds.' % int(tend - tstart)) def mda(uniprotId, path = '~/Desktop/', minScore = 50, includeNative = False, suppressDoubles = False, percentId = 0, forceBlast = False, group = False, hideSubmodels = True, hideComplex = False, hideAltChain = True, deleteHidden = True, coloring= 'mutations', winnow = '0', limit = '0', keepPDB = '', skipPDB = '', excludeSelected = False, showAlignment = True, noConfirm = False): """ Initial mda function, starts BLAST or reads stored BLAST database, then calls main(). """ from os.path import expanduser, join, exists, isfile, splitext, split from os import makedirs from OpenSave import osOpen from chimera.tkgui import app # open reply log (crashes on windows if many models are opened with showAlignment false and reply log hidden) OpenReplyLog() # Convert ~/ to user's home directory for path where output file is stored path = expanduser(path) # get path to where Chimera files are stored and create MDA folder from chimera.fetch import default_fetch_directory dbpath = join(default_fetch_directory(), 'MDA') dbpath = expanduser(dbpath) if not exists(dbpath): makedirs(dbpath) if not exists(path): makedirs(path) import chimera import shelve filenameblast = join(dbpath, 'MDA_blast') filenameseq = join(dbpath, 'MDA_seqs') print '\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -' print 'Starting MDA.' # destroy any open MAV instances (which also kills the corresponding Modeller interface) mavs = findMAV() for mavinst in mavs: print "Closing MAV instance." mavinst.destroy() # check whether the uniprot ID is an existing file fasta = expanduser(uniprotId) if isfile(fasta): ID = split(splitext(fasta)[0])[1] print 'Reading "%s"' % fasta fastafile = osOpen(fasta, 'r') seq = '' seqcount = 0 for line in fastafile: if seqcount > 1: print 'Skipping extra sequence(s) in file "%s."' % fasta break if line[0] != '>': seq += line.rstrip() else: seqcount += 1 print 'Query sequence read from fasta file "%s": %s' % (fasta, seq) forceBlast = True # ensure that no cached uniprot blasts are retrieved... storeblast = False print 'Assembling protein with sequence specified in fasta file %s.' % fasta elif len(uniprotId) == 6 or len(uniprotId) == 10: # make sure the user didn't just mess up a path or filename... storeblast = True ID = uniprotId try: # look for stored sequence data seqshelf = shelve.open(filenameseq, 'c') seq = seqshelf[ID] seqshelf.close() print 'Reading stored sequence data...' print 'Assembling protein with UniProt ID: "%s."' % ID except KeyError: # get sequence and protein name from uniprot database print 'Fetching sequence data...' from SeqAnnotations import uniprotFetch, mapUniprotNameID, InvalidAccessionError try: Test = mapUniprotNameID(ID) except InvalidAccessionError: print 'Unknown uniprot ID! MDA aborted.' return Protein = uniprotFetch(ID) seq = str(Protein[0]) print 'Storing sequence data...' seqshelf[ID] = seq seqshelf.close() else: print '"%s" seems to be neither an existing fasta file nor a uniprot id. MDA aborted.' % uniprotId return totseqlength = len(seq) if totseqlength >= 5000: print 'WARNING: Target protein has a large sequence length of %d aa. MDA might be slow; setting forceBlast = True.' % totseqlength app.update() forceBlast = True storeblast = False # callback after BLAST has produced results... def cb(results, storeblast=storeblast, uid = ID, filen = filenameblast, totseql = totseqlength, winnow = winnow, limit = limit): # split cb into cb and main because an additional argument needs to be passed to cb! # save results to file if storeblast: blastshelf = shelve.open(filen, 'c') blastshelf[uid+winnow] = results blastshelf.close() # process the results: main(results, uid, path, totseql, minScore, includeNative, suppressDoubles, percentId, group, hideSubmodels, hideComplex, hideAltChain, deleteHidden, coloring, winnow, limit, keepPDB, skipPDB, excludeSelected, showAlignment, noConfirm) # blast function def startBlast(querySeq = seq, winnow = winnow): from blastpdb.ParserBlastP import Blastp29Service program = None db = "pdbaanew" queryName = "MDAquery" evalue = "1000" matrix = "BLOSUM62" passes = None params = (program, db, queryName, querySeq, evalue, matrix, passes, winnow) print 'Running BLAST 2.2.29 using %s matrix...' % matrix def failCB(): print "failed" def cancelCB(): print "cancelled" bp29 = Blastp29Service(cb, params=params, failCB=failCB, cancelCB=cancelCB) if not forceBlast: blastshelf = shelve.open(filenameblast, 'c') try: results = blastshelf[ID+winnow] blastshelf.close() print 'Reading stored BLAST data...' except KeyError: blastshelf.close() startBlast() else: main(results, ID, path, totseqlength, minScore, includeNative, suppressDoubles, percentId, group, hideSubmodels, hideComplex, hideAltChain, deleteHidden, coloring, winnow, limit, keepPDB, skipPDB, excludeSelected, showAlignment, noConfirm) else: startBlast() def mdacommand(cmdname, args): """ Helps registering the command in chimera. """ from Commands import parse_arguments from Commands import string_arg, float_arg, bool_arg, int_arg req_args = ( ('uniprotId', string_arg), ) opt_args = ( ('path', string_arg), ) kw_args = ( ('minScore', float_arg), ('includeNative', bool_arg), ('forceBlast', bool_arg), ('suppressDoubles', bool_arg), ('percentId', float_arg), ('group', bool_arg), ('hideSubmodels', bool_arg), ('hideComplex', bool_arg), ('hideAltChain', bool_arg), ('deleteHidden', bool_arg), ('coloring', string_arg), ('winnow', string_arg), ('limit', string_arg), ('keepPDB', string_arg), ('skipPDB', string_arg), ('excludeSelected', bool_arg), ('showAlignment', bool_arg), ('noConfirm', bool_arg) ) kw = parse_arguments(cmdname, args, req_args, opt_args, kw_args) mda(**kw) # add mda command to chimera from Midas.midas_text import addCommand addCommand('mda', mdacommand, help = True)