# --- UCSF Chimera Copyright --- # Copyright (c) 2000 Regents of the University of California. # All rights reserved. This software provided pursuant to a # license agreement containing restrictions on its disclosure, # duplication and use. This notice must be embedded in or # attached to all copies, including partial copies, of the # software or any revisions or derivations thereof. # --- UCSF Chimera Copyright --- # # $Id: __init__.py 41156 2016-06-30 23:19:07Z pett $ """Find chemical groups in a structure""" from types import FunctionType import chimera from chimera.idatm import typeInfo, tetrahedral, planar, linear, single from miscFind import findAromatics, findAliAmines, findAroAmines # R is a shorthand for alkyl group # X is a shorthand for 'any halide' # None matches anything (and nothing) N = chimera.Element.N C = chimera.Element.C O = chimera.Element.O H = chimera.Element.H R = (C, H) X = ('F', 'Cl', 'Br', 'I') SingleBond = (H, {'geometry':tetrahedral}, {'geometry':single}) Heavy = {'notType': ['H', 'HC', 'D', 'DC']} NonOxygenSingleBond = (H, {'geometry':tetrahedral, 'notType': ['O', 'O3', 'O2', 'O3-', 'O2-', 'Oar', 'Oar+']}, {'geometry':single}) # 'R'/None and H must be found only in a list of descendents (not as the key # element of a group or subgroup) and they must be arranged to be trailing # components of a group description # # H can match nothing (and therefore R can match nothing) # # Note that 'R' in the textual group description is interpreted to mean: # "C or H, unless H would produce a different functional group (then just C)" # #--- # # For algorithmic group descriptions that aren't themselves functions, # the following applies: # # Atoms are indicated as either strings [idatm types] or numbers/ # symbolic constants [element number]. Tuples indicate the atom # can be any one of the alternatives in the tuple. # # A list where an atom is expected is always two elements: an atom, # and a list of what is bonded to that atom. # # Dictionaries indicate facets of the idatm type (as per the # chimera.idatm.typeInfo dictionaries). The keys of the dictionary # correspond to typeInfo dictionaries' keys, and the values are what # attribute value the atom must have to match. The dictionary can # have a special key named 'default'. The value of True for 'default' # means that atom types not in the typeInfo dictionary will match, # False means they won't. The default for 'default' [urg] is 0. The # dictionary can also have a special 'notType' key, whose value is a # list of idatm types that aren't allowed to match. # # Lastly, an "atom" can be an instance of RingAtom, which is initialized # with two arguments, the first of which is any of the preceding # forms of atom specification, and the second is the number of # intraresidue rings that the atom participates in # #--- # # The last component of the group description indicates which atoms are # considered "principal" and are returned as part of the atom group. This # typically prunes the 'R's from a group. A '1' indicates the atom is # principal, a '0' that it is not. Due to the algorithm for pruning, # 'optional' atoms (R and H) must appear at the end of the group description, # with all R's before any H's. # # This last component is also used to indicate ring closures. A value that # is not '0' or '1' is assumed to the first of several such values. Each # atom with that value must be identical or the group will be pruned. All # atoms but the first with that value will be pruned from the returned group. # class RingAtom: def __init__(self, atomDesc, numRings): self.atomDesc = atomDesc self.numRings = numRings groupInfo = { "acyl halide": ("R(C=O)X", ['C2', [X, 'O2', SingleBond]], [1,1,1,0]), "adenine": ("6-aminopurine", ['Npl', [['Car', ['Car', ['N2', [['Car', [['N2', [['Car', ['Car', ['Npl', ['C3', ['Car', [['N2', ['Car']], H]]]]]]]], H]]]]]], H, H]], [1,1,2,1,1,1,1,2,1,0,1,1,2,1,1,1,1]), "aldehyde": ("R(C=O)H", ['C2', ['O2', SingleBond, H]], [1,1,0,1]), "amide": ("R(C=O)NR2", ['C2', ['O2', 'Npl', None]], [1,1,1,0]), "amine": ("RxNHy", lambda m: findAliAmines(m) + findAroAmines(m, 0), None), "aliphatic amine": ("RxNHy", findAliAmines, None), "aliphatic primary amine": ("RNH2", [('N3','N3+'), ['C3', H, H, H]], [1,0,1,1,1]), "aliphatic secondary amine": ("R2NH", [('N3','N3+'), ['C3', 'C3', H, H]], [1,0,0,1,1]), "aliphatic tertiary amine": ("R3N", [('N3','N3+'), ['C3', 'C3', 'C3', H]], [1,0,0,0,1]), "aliphatic quaternary amine": ("R4N+", ['N3+', ['C3', 'C3', 'C3', 'C3']], [1,0,0,0,0]), "aromatic amine": ("RxNHy", lambda m: findAroAmines(m, 0), None), "aromatic primary amine": ("RNH2", lambda m: findAroAmines(m, 1), None), "aromatic secondary amine": ("R2NH", lambda m: findAroAmines(m, 2), None), "aromatic tertiary amine": ("R3N", lambda m: findAroAmines(m, 3), None), "aromatic ring":("aromatic", findAromatics, None), "carbonyl": ("R2C=O", ['O2', [C]], [1,1]), "carboxylate": ("RCOO-", ['Cac', [['O2-', []], ['O2-', []], SingleBond]], [1,1,1,0]), "cytosine": ("2-oxy-4-aminopyrimidine", ['Npl', [['C2', ['N2', ['C2', [['C2', [['Npl', ['C3', ['C2', ['N2', 'O2']]]], H]], H]]]]], H, H], [1,1,2,1,1,1,0,1,2,1,1,1,1,1]), "disulfide": ("RSSR", ['S3', [['S3', [SingleBond]], SingleBond]], [1,1,0,0]), "ester": ("R(C=O)OR", ['C2', ['O2', [O, [C]], SingleBond]], [1,1,1,0,0]), "ether O": ("ROR", ['O3', [C, C]], [1,0,0]), "guanine": ("2-amino-6-oxypurine", ['Npl', [['C2', [['N2', ['Car']], ['Npl', [['C2', ['O2', ['Car', ['Car', ['N2', [['Car', [['Npl', ['C3', 'Car']], H]]]]]]]], H]]]], H, H]], [1,1,1,2,1,1,1,1,2,1,1,1,0,2,1,1,1,1]), "halide": ("RX", [X, [Heavy]], [1,0]), "hydroxyl": ("COH or NOH", ['O3', [(C, N), H]], [1,0,1]), "imine": ("R2C=NR", ['C2', [['N2', [SingleBond]], SingleBond, SingleBond]], [1,1,0,0,0]), "ketone": ("R2C=O", ['C2', ['O2', C, C]], [1,1,0,0]), "methyl": ("RCH3", ['C3', [Heavy, H, H, H]], [1,0,1,1,1]), "nitrile": ("RC*N", ['C1', ['N1', SingleBond]], [1,1,0]), "nitro": ("RNO2", ['Ntr', ['O2-', 'O2-', NonOxygenSingleBond]], [1,1,1,0]), "phosphate": ("PO4", ['Pac', [O, O, O, O]], [1,1,1,1,1]), "phosphinyl": ("R2PO2-", ['Pac', [['O3-', []], ['O3-', []], R,R]], [1,1,1,0,0]), "phosphonate": ("RPO3-", ['Pac', ['O3-', 'O3-', 'O3-', C]], [1,1,1,1,0]), "purine": ("purine", ['Car', ['Car', RingAtom(N,1), [RingAtom(N,1), [[RingAtom('Car',1), [[RingAtom(N,1), [[RingAtom('Car',2), [RingAtom('Car',2), [RingAtom('Car',1), [[RingAtom(N,1), [[RingAtom('Car',1), [RingAtom(N,1), None]], None]], None]]]], None]], None]], None]]]], [3,2,4,1,1,1,2,3,1,1,1,4,0,0,0,0,0,0]), "pyrimidine": ("pyrimidine", [RingAtom(N,1), [RingAtom('Car',1), [RingAtom('Car',1), [[RingAtom(N,1), [[RingAtom('Car',1), [[RingAtom('Car',1), [RingAtom('Car',1), None]], None]], None]], None]], None]], [1,2,1,1,1,1,2,0,0,0,0,0]), "sulfate": ("SO4", ['Sac', [O, O, O, O]], [1,1,1,1,1]), "sulfonamide": ("RSO2NR2", ['Son', ['O2', 'O2', 'Npl', NonOxygenSingleBond]], [1,1,1,0,0]), "sulfonate": ("RSO3-", ['Sac', ['O3-', 'O3-', 'O3-', C]], [1,1,1,1,0]), "sulfone": ("R2SO2", ['Son', ['O2', 'O2', C, C]], [1,1,1,0,0]), "sulfonyl": ("R2SO2", ['Son', ['O2', 'O2', NonOxygenSingleBond, NonOxygenSingleBond]], [1,1,1,0,0]), "thiocarbonyl": ("C=S", ['S2', [C]], [1,1]), "thioether": ("RSR", ['S3', [C, C]], [1,0,0]), "thiol": ("RSH", ['S3', [C, H]], [1,0,1]), "thymine": ("5-methyl-2,4-dioxypyrimidine", ['C3', [['C2', ['C2', ['C2', ['O2', ['Npl', [['C2', ['O2', ['Npl', ['C3', ['C2', ['C2', H]]]]]], H]]]]]], H, H, H]], [1,2,3,1,1,1,1,1,1,0,3,2,1,1,1,1,1]), "uracil": ("2,4-dioxypyrimidine", ['O2', [['C2', ['Npl', ['Npl', [['C2', ['O2', ['C2', [['C2', [['Npl', ['C2', 'C3']], H]], H]]]], H]]]]]], [1,2,3,1,1,1,1,1,3,2,0,1,1,1]) } def findGroup(groupDesc, molecules): if isinstance(groupDesc, basestring): try: groupFormula, groupRep, groupPrincipals = groupInfo[ groupDesc] except: raise KeyError("No known chemical group named '%s'" % groupDesc) else: groupRep, groupPrincipals = groupDesc if isinstance(groupRep, FunctionType): return apply(groupRep, (molecules,)) groups = [] for molecule in molecules: atomList = molecule.atoms for atom in atomList: foundGroups = _traceGroup(atom, None, groupRep) if foundGroups == None: continue if not groupPrincipals: groups = groups + foundGroups continue for group in foundGroups: principalGroup = [] ringsOkay = True ringClosures = {} for i in range(len(group)): principal = groupPrincipals[i] if principal: if ringClosures.has_key( principal): if group[i] != \ ringClosures[principal]: ringsOkay = False break # don't add second # instance of this atom continue if principal != 1: ringClosures[principal]\ = group[i] principalGroup.append(group[i]) if ringsOkay: groups.append(principalGroup) return groups def _traceGroup(atom, parent, fragRep): # see if 'atom' satisfies the requirements of 'fragRep' without using # the 'parent' atom if isinstance(fragRep, list): atomTarget = fragRep[0] descendentsTarget = fragRep[1] else: atomTarget = fragRep descendentsTarget = None if not isinstance(atomTarget, list): atomMatches = _simpleMatch(atom, atomTarget) else: for target in atomTarget: atomMatches = _simpleMatch(atom, target) if atomMatches: break if not atomMatches: return None if descendentsTarget == None: return [[atom]] # 'descendentsTarget' should be a sequence that matches the # atoms connected to 'atom' (possibly skipping 'parent', if # specified) # since 'R' and None may match 0 or 1 atoms, can't check for exact # match between number of bonds and number of descendents bondsToMatch = len(atom.primaryBonds()) - (parent != None) possibleHs = descendentsTarget.count(R) + \ descendentsTarget.count(None) + descendentsTarget.count(H) if len(descendentsTarget) < bondsToMatch \ or len(descendentsTarget) - possibleHs > bondsToMatch: return None if len(descendentsTarget) == 0 and bondsToMatch == 0: return [[atom]] matches = _matchDescendents(atom, atom.primaryNeighbors(), parent, descendentsTarget[:], {}) if matches == None: return None for i in range(len(matches)): matches[i] = [atom] + matches[i] return matches def _simpleMatch(atom, target): if isinstance(target, basestring): if target == atom.idatmType: return True elif isinstance(target, int): if target == atom.element.number: return True elif isinstance(target, dict): idatmType = atom.idatmType if not typeInfo.has_key(idatmType): # uncommon type if target.has_key('default'): return target['default'] return False if 'notType' in target: if idatmType in target['notType']: return False info = typeInfo[idatmType] for key, value in target.items(): if key == 'default' or key == 'notType': continue if getattr(info, key) != value: return False return True elif target is None: return True elif isinstance(target, RingAtom): return _simpleMatch(atom, target.atomDesc) \ and len(atom.minimumRings()) == target.numRings else: # tuple for subtarget in target: if _simpleMatch(atom, subtarget): return True return False def _matchDescendents(atom, neighbors, parent, descendents, prevAssigned): # prevAssigned is a dictionary that notes what assignments of atoms # types to fragment descriptions have previously occurred and is used # to try to avoid multiply matching indistinguishable fragments with # the same set of atoms matches = [] target = descendents[0] alternatives = descendents[1:] possibleHs = descendents.count(R) + descendents.count(H) \ + descendents.count(None) prevAssigned = prevAssigned.copy() bondsToMatch = len(neighbors) - (parent != None) if len(descendents) < bondsToMatch \ or len(descendents) - possibleHs > bondsToMatch: return None for otherAtom in neighbors: if otherAtom == parent: continue if prevAssigned.has_key(otherAtom): skipAtom = False for fragment in prevAssigned[otherAtom]: if _fragCompare(fragment, target): skipAtom = True break if skipAtom: continue possibleMatches = _traceGroup(otherAtom, atom, target) if possibleMatches != None: if len(alternatives) == 0: remainderMatches = [[]] else: remainNeighbors = neighbors[:] remainNeighbors.remove(otherAtom) remainderMatches = _matchDescendents(atom, remainNeighbors, parent, alternatives, prevAssigned) if remainderMatches != None: for match1 in possibleMatches: for match2 in remainderMatches: matches.append(match1 + match2) if prevAssigned.has_key(otherAtom): # don't use append here! prevAssigned[otherAtom] = prevAssigned[otherAtom] + [target] else: prevAssigned[otherAtom] = [target] possibleHs = alternatives.count(R) + alternatives.count(H) \ + alternatives.count(None) if (target == R or target == H or target == None) \ and len(alternatives) >= bondsToMatch \ and len(alternatives) - possibleHs <= bondsToMatch: # since 'R'/None may be hydrogen, and hydrogen is missing # from the structure, check if the group matches while # omitting the 'R'/None (or H) if len(alternatives) == 0 and bondsToMatch == 0: matches.append([]) else: remainderMatches = _matchDescendents(atom, neighbors[:], parent, alternatives, prevAssigned) if remainderMatches != None: for match in remainderMatches: matches.append(match) if len(matches) > 0: return matches return None def _fragCompare(frag1, frag2): if type(frag1) != type(frag2): if isinstance(frag1, tuple) and frag2 in frag1: return True if isinstance(frag2, tuple) and frag1 in frag2: return True return False if isinstance(frag1, (tuple, list)): if len(frag1) != len(frag2): return False for i in range(len(frag1)): if isinstance(frag1[i], (tuple, list)): if not _fragCompare(frag1[i], frag2[i]): return False else: if frag1[i] != frag2[i]: return False return True return frag1 == frag2 # register selectors with selection manager... selCategory = 'functional group' nucSubcategory = 'nucleoside base' nucBases = ('adenine', 'cytosine', 'guanine', 'thymine', 'uracil') amineSubcategory = 'amine' try: from chimera.selection.manager import selMgr, SortString except ImportError: pass else: nucSels = {} amineSels = {} for group in groupInfo.keys(): selectorText = """\ import %s from chimera.misc import bonds all = [] for group in %s.findGroup('%s',molecules): # use 'bonds' instead of sel.addImplied # to avoid adding bonds between adjacent groups all.extend(group) all.extend(bonds(group)) sel.add(all) """ % (__name__, __name__, group) if group in nucBases: nucSels[SortString(group)] = selectorText elif group[-5:] == "amine": if group == "amine": key = "all" else: key = group[:-6] amineSels[key] = (selectorText, groupInfo[group][0]) else: selMgr.addSelector(__name__, [selMgr.CHEMISTRY, selCategory, group], selectorText, description=groupInfo[group][0]) selAll = None for ns in nucSels.values(): if selAll is None: selAll = ns else: selAll = selMgr.doOp('EXTEND', selAll, ns) nucSels[SortString('all of the above', 1)] = selAll for group in nucSels.keys(): if groupInfo.has_key(group): description = groupInfo[group][0] else: description = None selMgr.addSelector(__name__, [selMgr.CHEMISTRY, selCategory, nucSubcategory, group], nucSels[group], description=description) for amine in amineSels.keys(): sortVal = 0 if "aliphatic" in amine: sortVal += 1 if "aromatic" in amine: sortVal += 6 for add, substring in enumerate(["primary", "secondary", "tertiary", "quaternary"]): if substring in amine: sortVal += add + 1 selMgr.addSelector(__name__, [selMgr.CHEMISTRY, selCategory, amineSubcategory, SortString(amine, sortVal)], amineSels[amine][0], description=amineSels[amine][1]) selMgr.makeCallbacks() del group, nucSels, selAll, ns, description, selMgr, selectorText del amineSels, amine, SortString