#!/usr/local/bin/python # Copyright (c) 2000 by the Regents of the University of California. # All rights reserved. See http://www.cgl.ucsf.edu/chimera/ for # license details. # # $Id: oslParser.py 41022 2016-05-16 18:43:33Z pett $ import types import re import string import StringIO import operator from oslLexer import * class OSLSyntaxError(SyntaxError): pass # replace expand() with: # # from chimera import selection # sel = selection.OSLSelection(str) # # and to use it to get atoms (vertices): # # sel.itemize(vertex=1) => { chimera.SelVertex: [...] } # sel.apply(vFunc=func) => None (just explicit atoms in selection) # sel.expandApply(chimera.SelVertex, vFunc) => None (all implied atoms) # Obsolete: #def expand(s, graphs): # """ # Expand an object specification string and a graph list into an # object selection. See "applyFunctions" for acceptable graph list # constructs # """ # # import oslSelection # oslSel = oslSelection.oslSelection() # applyFunctions(s, graphs, oslSel.addGraph, # oslSel.addSubgraph, oslSel.addVertex) # return oslSel def applyFunctions(s, graphs, gFunc, sFunc, vFunc): """ Base on an object specification string, apply functions to selected graphs, subgraphs and vertices in the given graph list. The graph list can either be a simple list, or a dictionary whose values are lists of graphs. Any of the supplied functions may be None. """ if gFunc: gs = 'gFunc' else: gs = None if sFunc: ss = 'sFunc' else: ss = None if vFunc: vs = 'vFunc' else: vs = None p = Parser(s, gs, ss, vs) if not graphs or (not gFunc and not sFunc and not vFunc): return # no graphs to examine if isinstance(graphs, dict): oslGraphList = reduce(operator.add, graphs.values(), []) else: oslGraphList = graphs matchIndex = 0 try: exec p.code in globals(), locals() except: from replyobj import message message("Generated OSL code string was:\n%s\n" % p.code) import sys raise OSLSyntaxError("Error in '%s': %s" % (s, sys.exc_info()[1])) KeyEnd = 'end' # Parser class variables: # level the relationships among graphs, subgraphs and vertices # objects with smaller numbers contain objects with bigger # these are constants, since relationships are fixed # symbol symbol of the iterator for each type of object # Parser instance variables: # input string to parse # error error encountered parsing input # code code generated from parsing input # __indent the indentation level for each object type # __end index of end of input # __start where to start looking for the next token # __f temporary output stream for generated loop code # __p temporary output stream for generated setup code # __cond temporary output stream for generated conditions # __token current token # __key current key level (graph, subgraph, etc.) class Parser: level = { None:-1, KeyGraph:0, KeySubgraph:1, KeyVertex:2, KeyEnd:3 } symbol = { None:'?', KeyGraph:'g', KeySubgraph:'s', KeyVertex:'v' } # # __init__ # initialize a Parser instance by parsing the given # string and generating Python code corresponding to # the object selection language specification # def __init__(self, s, gFunc, sFunc, vFunc): self.input = s self.code = None self.error = None self.gFunc = gFunc self.sFunc = sFunc self.vFunc = vFunc self.__indent = { None:-1, KeyGraph:0, KeySubgraph:1, KeyVertex:2, KeyEnd:3 } self.__start = 0 self.__end = len(self.input) self.__p = StringIO.StringIO() self.__f = StringIO.StringIO() self.__p.write('import chimera\n') self.__p.write('from chimera.oslParser import oslTestAttr\n') self.__key = None self.__nextToken(ModeAbbr) self.__specIndex = 0 self.__abbrIndex = 0 while self.__token['type'] is TypeKey: self.__parseKey() if self.__token['type'] is not TypeEnd: raise SyntaxError, \ ('extraneous token', ('', 1, self.__token['start'], self.input)) self.__newKey(None) self.code = self.__p.getvalue() + self.__f.getvalue() del self.__p del self.__f # # __nextToken # Get the next token from input string # def __nextToken(self, mode): self.__token, self.__start = nextToken(self.input, self.__start, self.__end, mode) # # __beginCond # Prepare to create the conditional part of a selection test # We create a StringIO object to store any generated code # def __beginCond(self): self.__cond = StringIO.StringIO() # # __addCond # Add code for condition of current test # def __addCond(self, s): self.__cond.write(s) # # __checkPresence # Add code to test for the truth value of an attribute # def __checkPresence(self, key, attr, present=1): if not present: self.__cond.write('not ') self.__cond.write('(hasattr(%s, %s) and %s.%s)' % (Parser.symbol[key], repr(attr), Parser.symbol[key], str(attr))) # # __checkDefined # Add code to test if an attribute is defined # def __checkDefined(self, key, attr, present=1): if not present: self.__cond.write('not ') self.__cond.write('(hasattr(%s, %s) and %s.%s is not None)' % (Parser.symbol[key], repr(attr), Parser.symbol[key], str(attr))) # __checkValue # Add code to test an attribute against a constant # def __checkValue(self, key, attr, op, s): self.__cond.write('oslTestAttr(%s, %s, %s, %s)' % (Parser.symbol[key], repr(attr), repr(op), repr(s))) # # __endCond # If If code was generated for a condition, we insert a test # into the output code # def __endCond(self): cond = self.__cond.getvalue() if cond != '': self.__insertTest(cond) del self.__cond # # __insertTest # Insert a test into the output code # Since we are inserting an if statement, we adjust the # indentation level of all objects that we contain to match # the indentation level of this test # def __insertTest(self, cond, commaInfo=None): indent = self.__indent[self.__key] + 1 self.__f.write('%sif %s:\n' % ('\t' * indent, cond)) cIndent = '\t' * (indent+1) if commaInfo: pkey, commaAbbr = commaInfo self.__f.write('%sfor %scommaIndex, abbrTest in ' 'enumerate(%s):\n' % (cIndent, pkey, commaAbbr)) self.__f.write('%s\tif %s.oslTestAbbr(abbrTest):\n' % (cIndent, pkey)) self.__f.write('%s\t\tbreak\n' % cIndent) if self.__key is KeyGraph: self.__indent[KeyGraph] = indent self.__indent[KeySubgraph] = indent + 1 self.__indent[KeyVertex] = indent + 2 self.__indent[KeyEnd] = indent + 3 elif self.__key is KeySubgraph: self.__indent[KeySubgraph] = indent self.__indent[KeyVertex] = indent + 1 self.__indent[KeyEnd] = indent + 2 elif self.__key is KeyVertex: self.__indent[KeyVertex] = indent self.__indent[KeyEnd] = indent + 1 # # __insertAbbrev # Insert a test for an abbreviation into the output code # Note that we call __insertTest to do all the hard work # def __insertAbbrev(self, key, depth, abbr): n = self.__abbrIndex # Since we no longer have whitespace as abbr-ending # (to allow spaces after commas) we need to rstrip abbr = abbr.rstrip() if ',' in abbr: followsComma = False condensed = [] for c in abbr: if c == ',': followsComma = True condensed.append(c) elif c in string.whitespace: if not followsComma: condensed.append(c) else: followsComma = False condensed.append(c) abbr = u"".join(condensed) self.__p.write('abbr%d = chimera.OSLAbbreviation(%d, %s)\n' % (n, depth, repr(abbr))) kw = {} if ',' in abbr: self.__p.write('commaAbbr%d = [' % n) for commaAbbr in abbr.split(","): self.__p.write('chimera.OSLAbbreviation(%d,' ' %s), ' % (depth, repr(commaAbbr))) self.__p.write(']\n') kw['commaInfo'] = (Parser.symbol[key], 'commaAbbr%d'%n) self.__insertTest('%s.oslTestAbbr(abbr%d)' % (Parser.symbol[key], n), **kw) self.__abbrIndex = n + 1 # # __tab # Indent to the proper level # def __tab(self, key): self.__f.write('\t' * self.__indent[self.__key]) # # __newKey # Switch to a new object type (key) # This is the function that inserts all the code between tests # The algorithm is divided into two phases: # 1. If the new key level is higher (more specific) than # the current key level, we descend to that level # 2. If the new key level is equal or lower (same # or less specific) than the current key level, we # invoke the callback and pop up to the new level # 3. Readjust the indentation levels to the new object type # Phase 1 allows us to insert the start of the loop constructs # Phase 2 allows us to terminate the loop and select objects # if appropriate # Phase 3 allows us to move on to the next test in a consistent # state # def __newKey(self, key): if self.level[key] > self.level[self.__key]: while self.__key is not key: if self.__key is None: self.__key = KeyGraph self.__f.write('for gIndex, %s in enumerate(oslGraphList):\n' % Parser.symbol[KeyGraph]) self.__f.write('\t%scommaIndex = -1\n' % Parser.symbol[KeyGraph]) elif self.__key is KeyGraph: self.__key = KeySubgraph self.__tab(self.__key) self.__f.write('for sIndex, %s in ' 'enumerate(%s.oslChildren()):\n' % (Parser.symbol[KeySubgraph], Parser.symbol[KeyGraph])) self.__tab(self.__key) self.__f.write('\t%scommaIndex = -1\n' % Parser.symbol[KeySubgraph]) if not self.sFunc and not self.vFunc: # ugly, but it works self.__tab(self.__key) self.__f.write('\tbreak' ' # no subgraph or vertex functions\n') elif self.__key is KeySubgraph: self.__key = KeyVertex self.__tab(self.__key) self.__f.write('for vIndex, %s in ' 'enumerate(%s.oslChildren()):\n' % (Parser.symbol[KeyVertex], Parser.symbol[KeySubgraph])) self.__tab(self.__key) self.__f.write('\t%scommaIndex = -1\n' % Parser.symbol[KeyVertex]) else: if self.__key is KeyGraph: if self.gFunc: self.__tab(self.__key) self.__f.write('\t# Use graph\n') self.__tab(self.__key) self.__f.write('\t%s(%s, %d, ' 'gcommaIndex, matchIndex)\n' % (self.gFunc, Parser.symbol[KeyGraph], self.__specIndex)) self.__tab(self.__key) self.__f.write('\tmatchIndex = ' 'matchIndex + 1\n') if self.sFunc or self.vFunc: self.__key = KeySubgraph self.__setIndent() self.__tab(self.__key) self.__f.write('for sIndex, %s in ' 'enumerate(%s.oslChildren()):\n' % (Parser.symbol[KeySubgraph], Parser.symbol[KeyGraph])) self.__tab(self.__key) self.__f.write('\t%scommaIndex = -1\n' % Parser.symbol[KeySubgraph]) if self.__key is KeySubgraph: if self.sFunc: self.__tab(self.__key) self.__f.write('\t# Use subgraph\n') self.__tab(self.__key) self.__f.write('\t%s(%s, %d, ' '(gIndex if gcommaIndex < 0 else gcommaIndex, scommaIndex),' ' matchIndex)\n' % (self.sFunc, Parser.symbol[KeySubgraph], self.__specIndex)) self.__tab(self.__key) self.__f.write('\tmatchIndex = ' 'matchIndex + 1\n') else: self.__tab(self.__key) self.__f.write('\tpass\n') if self.vFunc: self.__key = KeyVertex self.__setIndent() self.__tab(self.__key) self.__f.write('for vIndex, %s in ' 'enumerate(%s.oslChildren()):\n' % (Parser.symbol[KeyVertex], Parser.symbol[KeySubgraph])) self.__tab(self.__key) self.__f.write('\t%scommaIndex = -1\n' % Parser.symbol[KeyVertex]) if self.__key is KeyVertex: if self.vFunc: self.__tab(self.__key) self.__f.write('\t# Use vertex\n') self.__tab(self.__key) self.__f.write('\t%s(%s, %d, ' '(gIndex if gcommaIndex < 0 else gcommaIndex, ' 'sIndex if scommaIndex < 0 else scommaIndex, ' 'vcommaIndex), matchIndex)\n' % (self.vFunc, Parser.symbol[KeyVertex], self.__specIndex)) self.__tab(self.__key) self.__f.write('\tmatchIndex = ' 'matchIndex + 1\n') else: self.__tab(self.__key) self.__f.write('\tpass\n') self.__key = key self.__setIndent() self.__specIndex = self.__specIndex + 1 # # __setIndent # Set the indentation levels according to the key # def __setIndent(self): if self.__key is KeyGraph: indent = self.__indent[None] self.__indent[KeyGraph] = indent + 1 self.__indent[KeySubgraph] = indent + 2 self.__indent[KeyVertex] = indent + 3 self.__indent[KeyEnd] = indent + 4 elif self.__key is KeySubgraph: indent = self.__indent[KeyGraph] self.__indent[KeySubgraph] = indent + 1 self.__indent[KeyVertex] = indent + 2 self.__indent[KeyEnd] = indent + 3 elif self.__key is KeyVertex: indent = self.__indent[KeySubgraph] self.__indent[KeyVertex] = indent + 1 self.__indent[KeyEnd] = indent + 2 elif self.__key is None: self.__indent[KeyGraph] = 0 self.__indent[KeySubgraph] = 1 self.__indent[KeyVertex] = 2 self.__indent[KeyEnd] = 3 # # __parseKey # Input consists of a set of selectors that consist of # a predefined key (# for graphs, : for subgraphs and # @ for vertices), an optional abbreviation string and # a qualifier (which starts with a /) # We parse this input use a recursive descent parser that # starts with the first key # def __parseKey(self): key = self.__token['key'] if key is not KeyGraph \ and key is not KeySubgraph \ and key is not KeyVertex: raise ValueError, ('unknown key type', self.__token) self.__newKey(key) level = -1 while self.__token['type'] is TypeKey \ and self.__token['key'] is key: level = level + 1 self.__nextToken(ModeAbbr) if self.__token['type'] is TypeAbbr: self.__insertAbbrev(self.__key, level, self.__token['string']) self.__nextToken(ModeQual) if self.__token['type'] is TypeQual: self.__beginCond() self.__parseQualifier() self.__endCond() # # __parseQualifier # Parse the qualifier expression # def __parseQualifier(self): if self.__token['type'] is not TypeQual: raise RuntimeError, ('unexpected token', self.__token) self.__nextToken(ModeQual) self.__parseOrList() # # __parseOrList # The qualifier expression can have "or" and "and" conjunctions # We define "and" as having higher precedence than "or", so # we parse the "or" list higher # def __parseOrList(self): self.__parseAndList() while self.__token['type'] is TypeOp \ and self.__token['operator'] is OpOr: self.__addCond(' or ') self.__nextToken(ModeQual) self.__parseAndList() # # __parseAndList # Parse the "and" list of qualifier expressions # def __parseAndList(self): self.__parseTest() while self.__token['type'] is TypeOp \ and self.__token['operator'] is OpAnd: self.__addCond(' and ') self.__nextToken(ModeQual) self.__parseTest() # # __parseTest # Each "and" list consists of a series of actual conditions # A condition may be simply the name of an attribute, which # tests for the presence of the attribute (a preceding # ! negates the test) # It may also be a name and a constant, separated by an # operator, which tests the attribute against a fixed value # Finally, a test may be a parenthesized expression as well # def __parseTest(self): if self.__token['type'] is TypeIdent: left = self.__token self.__nextToken(ModeQual) if self.__token['type'] is not TypeOp: # Create node self.__checkPresence(self.__key, left['string']) else: op = self.__token['operator'] if op is OpMatch \ or op is OpNotMatch \ or op is OpEQ1 \ or op is OpEQ2 \ or op is OpNE \ or op is OpLT \ or op is OpLE \ or op is OpGT \ or op is OpGE: self.__nextToken(ModeValue) if self.__token['type'] is not TypeIdent: raise SyntaxError, \ ('expected identifier', ('', 1, self.__token['start'], self.input)) # Create node self.__checkValue(self.__key, left['string'], op, self.__token['string']) self.__nextToken(ModeQual) else: # Create node self.__checkPresence(self.__key, left['string']) elif self.__token['type'] is TypeOp: if self.__token['operator'] is OpNot: self.__nextToken(ModeQual) if self.__token['type'] is not TypeIdent: raise SyntaxError, \ ('expected identifier', ('', 1, self.__token['start'], self.input)) # Create node self.__checkPresence(self.__key, self.__token['string'], 0) self.__nextToken(ModeQual) elif self.__token['operator'] is OpUndef: self.__nextToken(ModeQual) if self.__token['type'] is not TypeIdent: raise SyntaxError, \ ('expected identifier', ('', 1, self.__token['start'], self.input)) # Create node self.__checkDefined(self.__key, self.__token['string'], 0) self.__nextToken(ModeQual) elif self.__token['operator'] is OpLP: self.__addCond('(') self.__nextToken(ModeQual) self.__parseOrList() if self.__token['type'] is not TypeOp \ or self.__token['operator'] is not OpRP: raise SyntaxError, \ ('expected close parenthesis', ('', 1, self.__token['start'], self.input)) self.__addCond(')') self.__nextToken(ModeQual) else: raise SyntaxError, ('unexpected operator', ('', 1, self.__token['start'], self.input)) else: raise SyntaxError, ('unexpected token', ('', 1, self.__token['start'], self.input)) # # _typeTestDict is the dictionary holding attribute test functions for # a particular type of object (e.g., Integer, Instance, etc.) # _classTestDict is the dictionary holding attribute test functions # for an instance of a particular Python class (note that # extension classes have distinct types from a Python class # instance and therefore must be registered in _typeTestDict # _typeTestDict = {} _classTestDict = {} _classAttrTestDict = {} def registerTest(subject, testFunc): if isinstance(subject, type): _typeTestDict[subject] = testFunc elif isinstance(subject, types.ClassType): _classTestDict[subject] = testFunc elif isinstance(subject, (tuple, list)): # for class attributes that are types (rather than instances) # mostly used for enums that have symbolic names klass, attrName = subject try: catDict = _classAttrTestDict[klass] except KeyError: catDict = {} _classAttrTestDict[klass] = catDict catDict[attrName] = testFunc else: raise TypeError, \ "Cannot register test for non-attr non-class non-type" # # oslTestAttr # Compares an attribute of this instance with the given value # 'attrName' is the name of the attribute # 'op' is the string representation of the comparison operator # 'value' is the constant (string) value to compare with # def oslTestAttr(obj, attrName, op, value): try: attr = getattr(obj, attrName) except AttributeError: return op in (OpNE, OpNotMatch) and 1 or 0 if callable(attr): attr = apply(attr, ()) try: return _typeTestDict[type(attr)](attr, op, value) except ValueError: # perhaps an enum with a symbolic name... try: testFunc = _classAttrTestDict[obj.__class__][attrName] except KeyError: return op in (OpNE, OpNotMatch) and 1 and 0 return testFunc(obj, op, value) except KeyError: return op in (OpNE, OpNotMatch) and 1 or 0 # # Here are the test functions for standard types: string, number and instance # def _stringTest(attr, op, value): if op in (OpMatch, OpNotMatch): m = re.match(value, attr, re.I) matched = m is not None and m.end() == len(attr) if op == OpNotMatch: return not matched return matched if op in (OpEQ1, OpNE): if attr.lower() == value.lower(): return op == OpEQ1 and 1 or 0 return op == OpNE and 1 or 0 d = cmp(attr, value) if d < 0: if op in (OpNE, OpLE, OpLT): return 1 elif d == 0: if op in (OpEQ2, OpGE, OpLE): return 1 else: if op in (OpNE, OpGE, OpGT): return 1 return 0 for st in types.StringTypes: registerTest(st, _stringTest) def _numberTest(attr, op, value): if op in (OpMatch, OpNotMatch): return 0 value = string.atof(value) d = cmp(attr, value) if d < 0: if op in (OpNE, OpLE, OpLT): return 1 elif d == 0: if op in (OpEQ1, OpEQ2, OpGE, OpLE): return 1 else: if op in (OpNE, OpGE, OpGT): return 1 return 0 registerTest(int, _numberTest) registerTest(long, _numberTest) registerTest(float, _numberTest) import numpy for bits in ["16", "32", "64", "128"]: for base in ["int", "uint", "float"]: try: registerTest(getattr(numpy, base + bits), _numberTest) except AttributeError: pass def _instanceTest(inst, op, value): try: return _classTestDict[inst.__class__](inst, op, value) except KeyError: inst = str(inst) d = cmp(inst, value) if d < 0: if op in (OpNE, OpLE, OpLT): return 1 elif d == 0: if op in (OpEQ1, OpEQ2, OpGE, OpLE): return 1 else: if op in (OpNE, OpGE, OpGT): return 1 return 0 registerTest(types.InstanceType, _instanceTest) # # Test code # if __name__ == '__main__': def testString(s): print 'parsing', repr(s) try: p = Parser(s, 'graphFunc', 'subgraphFunc', 'vertexFunc') print 'Parse code:' print print p.code print compile(p.code, '', 'exec') except SyntaxError, v: print 'Parse error:', v if 1: testString('#0:90.A@CA:91.A@CA:92.A@CA:93.A@CA') testString('#1:90.A@CA:91.A@CA:92.A@CA:93.A') else: testString('#0:12:14') testString(':12@ca@cb') testString('#/number=0@/name=ca') testString('#0:12@ca #1@cb') if 0: import sys while 1: print '> ', s = sys.stdin.readline() if not s: break testString(s)