}. Call C{withAttribute} with a series of attribute names and values. Specify the list of filter attributes names and values as: - keyword arguments, as in C{(align="right")}, or - as an explicit dict with C{**} operator, when an attribute name is also a Python reserved word, as in C{**{"class":"Customer", "align":"right"}} - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. To verify that the attribute exists, but without specifying a value, pass C{withAttribute.ANY_VALUE} as the value. """ if args: attrs = args[:] else: attrs = attrDict.items() attrs = [(k,v) for k,v in attrs] def pa(s,l,tokens): for attrName,attrValue in attrs: if attrName not in tokens: raise ParseException(s,l,"no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % (attrName, tokens[attrName], attrValue)) return pa withAttribute.ANY_VALUE = object() opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached to operator expressions. Parameters: - baseExpr - expression representing the most basic element for the nested - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form (opExpr, numTerms, rightLeftAssoc, parseAction), where: - opExpr is the pyparsing expression for the operator; may also be a string, which will be converted to a Literal; if numTerms is 3, opExpr is a tuple of two expressions, for the two operators separating the 3 terms - numTerms is the number of terms for this operator (must be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the pyparsing-defined constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted) - lpar - expression for matching left-parentheses (default=Suppress('(')) - rpar - expression for matching right-parentheses (default=Suppress(')')) """ ret = Forward() lastExpr = baseExpr | ( lpar + ret + rpar ) for i,operDef in enumerate(opList): opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] if arity == 3: if opExpr is None or len(opExpr) != 2: raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") opExpr1, opExpr2 = opExpr thisExpr = Forward()#.setName("expr%d" % i) if rightLeftAssoc == opAssoc.LEFT: if arity == 1: matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) elif arity == 2: if opExpr is not None: matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) else: matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) elif arity == 3: matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") elif rightLeftAssoc == opAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Optional): opExpr = Optional(opExpr) matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) elif arity == 2: if opExpr is not None: matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) else: matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) elif arity == 3: matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") else: raise ValueError("operator must indicate right or left associativity") if pa: matchExpr.setParseAction( pa ) thisExpr << ( matchExpr | lastExpr ) lastExpr = thisExpr ret << lastExpr return ret operatorPrecedence = infixNotation dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") unicodeString = Combine(_L('u') + quotedString.copy()) def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): """Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default="("); can also be a pyparsing expression - closer - closing character for a nested list (default=")"); can also be a pyparsing expression - content - expression for items within the nested lists (default=None) - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the C{ignoreExpr} argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. The default is L{quotedString}, but if no expressions are to be ignored, then pass C{None} for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if isinstance(opener,basestring) and isinstance(closer,basestring): if len(opener) == 1 and len(closer)==1: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS ).setParseAction(lambda t:t[0].strip())) else: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: raise ValueError("opening and closing arguments must be strings if no content expression is given") ret = Forward() if ignoreExpr is not None: ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) else: ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) return ret def indentedBlock(blockStatementExpr, indentStack, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the the current level; set to False for block of left-most statements (default=True) A valid block must contain at least one C{blockStatement}. """ def checkPeerIndent(s,l,t): if l >= len(s): return curCol = col(l,s) if curCol != indentStack[-1]: if curCol > indentStack[-1]: raise ParseFatalException(s,l,"illegal nesting") raise ParseException(s,l,"not a peer entry") def checkSubIndent(s,l,t): curCol = col(l,s) if curCol > indentStack[-1]: indentStack.append( curCol ) else: raise ParseException(s,l,"not a subentry") def checkUnindent(s,l,t): if l >= len(s): return curCol = col(l,s) if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s,l,"not an unindent") indentStack.pop() NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) INDENT = Empty() + Empty().setParseAction(checkSubIndent) PEER = Empty().setParseAction(checkPeerIndent) UNDENT = Empty().setParseAction(checkUnindent) if indent: smExpr = Group( Optional(NL) + #~ FollowedBy(blockStatementExpr) + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) else: smExpr = Group( Optional(NL) + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) blockStatementExpr.ignore(_bslash + LineEnd()) return smExpr alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") htmlComment = Regex(r"") restOfLine = Regex(r".*").leaveWhitespace() dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) print ("tokens = " + str(tokens)) print ("tokens.columns = " + str(tokens.columns)) print ("tokens.tables = " + str(tokens.tables)) print (tokens.asXML("SQL",True)) except ParseBaseException as err: print (teststring + "->") print (err.line) print (" "*(err.column-1) + "^") print (err) print() selectToken = CaselessLiteral( "select" ) fromToken = CaselessLiteral( "from" ) ident = Word( alphas, alphanums + "_$" ) columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) columnNameList = Group( delimitedList( columnName ) )#.setName("columns") tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) tableNameList = Group( delimitedList( tableName ) )#.setName("tables") simpleSQL = ( selectToken + \ ( '*' | columnNameList ).setResultsName( "columns" ) + \ fromToken + \ tableNameList.setResultsName( "tables" ) ) test( "SELECT * from XYZZY, ABC" ) test( "select * from SYS.XYZZY" ) test( "Select A from Sys.dual" ) test( "Select AA,BB,CC from Sys.dual" ) test( "Select A, B, C from Sys.dual" ) test( "Select A, B, C from Sys.dual" ) test( "Xelect A, B, C from Sys.dual" ) test( "Select A, B, C frox Sys.dual" ) test( "Select" ) test( "Select ^^^ frox Sys.dual" ) test( "Select A, B, C from Sys.dual, Table2 " )