# This software and supporting documentation are distributed by # Institut Federatif de Recherche 49 # CEA/NeuroSpin, Batiment 145, # 91191 Gif-sur-Yvette cedex # France # # This software is governed by the CeCILL license version 2 under # French law and abiding by the rules of distribution of free software. # You can use, modify and/or redistribute the software under the # terms of the CeCILL license version 2 as circulated by CEA, CNRS # and INRIA at the following URL "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license version 2 and that you accept its terms. import re, types #------------------------------------------------------------------------------ class DictPattern: '''A DictPattern contains a pattern that is matched against a string and a set of attributes contained in a dictionary. When the match succeed, it returns a dictionary containing attributes values extracted from the input string. Such patterns are used to define Brainvisa ontology rules which associate filenames and data types. The input pattern is a string that is splitted in three kinds of tokens: * An attribute name enclosed in ``<`` and ``>`` * A named regular expression enclosed in ``{`` and ``}`` * A string literal which is everything not enclosed neither in braces nor with ``<`` and ``>``. When ``DictPattern.match( s, dict )`` is called, all attribute names from the pattern are replaced by the corresponding value in dict. If the dict does not contain the attribute, the match fails. Then, the string is matched against the pattern. If the pattern contains named regular expressions, the values corresponding to each expression is put in the resulting dictionary. If the match succeed, a dictionary (possibly empty) is returned. Otherwise, ``None`` is returned. In the string literal of the pattern, special characters can be found: * ``*`` matches any string and the matched value is associated to a ``filename_variable`` key in the results dictionary. * ``#`` matches any number and the matches value is associated to a ``name_serie`` key in the results dictionary. :Match examples: :: p = DictPattern( '_t1' ) p.match( 's_t1', { 'subject': 's' } ) == {} p.match( 's_t1', { 'subject': 'x' } ) == None p.match( 's_t2', { 'subject': 's' } ) == None p = DictPattern( '{subject}_t1' ) p.match( 's_t1', { 'subject': 's' } ) == {'subject': 's'} p.match( 'tutu_t1', { 'subject': 's' } ) == {'subject': 'tutu'} p.match( 's_t2', { 'subject': 's' } ) == None p = DictPattern( '*_#' ) p.match( 'toto_titi', {} ) == None p.match( 'toto_42', {} ) == {'name_serie':'42', 'filename_variable':'toto'} p = DictPattern( 'begin*__*end' ) p.match( 'beginxxx_anatole_yyyend', { 'subject': 'anatole' } ) == None p.match( 'beginxxx_anatole_xxxend', { 'subject': 'anatole' } ) == { 'filename_variable': 'xxx' } p.match( 'beginxxx_anatole_xxxend', { 'subject': 'raymond' } ) == None p = DictPattern( 'begin#__#end': ( p.match( 'beginxxx_anatole_xxxend', { 'subject': 'anatole' } ) == None p.match( 'begin123_anatole_456end', { 'subject': 'anatole' } ) == None p.match( 'begin123_anatole_123end', { 'subject': 'anatole' } ) == { 'name_serie': '123' } p.match( 'begin123_anatole_123end', { 'subject': 'raymond' } ) == None :Unmatch example: >>> DictPattern.unmatch( matchResult, dict ) This allows to build the string that would produce matchResult if ``DictPattern.match( s, dict )`` is succesfully used. The unmatch always succeed if matchResult is not None, in this case, we have : >>> DictPattern.match( DictPattern.match( s, dict ), dict ) == s ''' class Constant: pass def __repr__( self ): return str( self ) class Litteral( Constant ): def __init__( self, s ): self.__litteral = s def __call__( self, dict={}, matchResult=None ): return self.__litteral def escaped( self ): return DictPattern.EscapedLitteral( self.__litteral ) def __str__( self ): return repr(self.__litteral) class Attribute( Constant ): def __init__( self, s ): self.__key = s def __call__( self, dict, matchResult=None ): try: return dict[ self.__key ] except KeyError, ke: splitted = self.__key.split( '.' ) if len( splitted ) > 1: stack = splitted[:] try: result = dict while stack: result = result[ stack.pop( 0 ) ] return result except KeyError: if not matchResult: raise ke try: result = matchResult while splitted: result = result[ splitted.pop( 0 ) ] if not result: raise ke return result except: raise ke else: if not matchResult: raise ke try: result = matchResult[ self.__key ] if not result: raise ke return result except KeyError: raise ke def escaped( self ): return DictPattern.EscapedAttribute( self.__key ) def __str__( self ): return '' class EscapedLitteral( Litteral ): def __init__( self, s ): DictPattern.Litteral.__init__( self, re.escape( s ) ) class EscapedAttribute( Attribute ): def __call__( self, dict ): return re.escape( DictPattern.Attribute.__call__( self, dict ) ) def __str__( self ): return '' class MatchResult( Constant ): def __init__( self, s ): self.__key = s self.__splittedKey = s.split( '.' ) def __call__( self, dict, matchResult ): if matchResult is None: raise KeyError( self.__key ) # search for matchResult["att1.att2..."] result=matchResult.get(self.__key, None) if type(result) is list and result: # case of name_serie result=result[0] if result: return result # if not found, search for matchResult["att1"][att2]... try: stack = self.__splittedKey[:] result = matchResult while stack: result = result[ stack.pop( 0 ) ] if result: return result except KeyError: pass # if not found, search in dict result=dict.get(self.__key, None) if result: return result try: stack = self.__splittedKey[:] result = dict while stack: result = result[ stack.pop( 0 ) ] if not result: raise KeyError( self.__key ) if result: return result raise KeyError( self.__key ) except KeyError: raise KeyError( self.__key ) def __str__( self ): return '' class RegexpMatch: def __init__( self, matchList ): precompile = True for i in matchList: if isinstance( i, DictPattern.Attribute ): precompile = False break if precompile: # Regex is constant and can be precompiled attributeToGroupname = {} self.groupnameToAttributes = {} matchRegexp = '' for i in matchList: if type( i ) is types.TupleType: attribute, regexp = i groupname = attributeToGroupname.get( attribute ) if groupname: matchRegexp += '(?P=' + groupname + ')' else: if attribute.find( '.' ) == -1: groupname = attribute attributeToGroupname[ attribute ] = groupname self.groupnameToAttributes[ groupname ] = ( attribute, ) else: s = attribute.split( '.' ) groupname = '__dict_' + '_'.join( s ) + '__' attributeToGroupname[ attribute ] = groupname self.groupnameToAttributes[ groupname ] = s matchRegexp += '(?P<' + groupname + '>' + regexp + ')' else: matchRegexp += re.escape( i() ) self.__regexp = matchRegexp self.__match = re.compile( matchRegexp ) else: self.__regexp = None attributeToGroupname = {} self.groupnameToAttributes = {} self.__match = [] for i in matchList: if type( i ) is types.TupleType: attribute, regexp = i groupname = attributeToGroupname.get( attribute ) if groupname: self.__match.append( DictPattern.Litteral( '(?P=' + groupname + ')' ) ) else: if attribute.find( '.' ) == -1: groupname = attribute attributeToGroupname[ attribute ] = groupname self.groupnameToAttributes[ groupname ] = ( attribute, ) else: s = attribute.split( '.' ) groupname = '__dict_' + '_'.join( s ) + '__' attributeToGroupname[ attribute ] = groupname self.groupnameToAttributes[ groupname ] = s self.__match.append( DictPattern.Litteral( '(?P<' + groupname + '>' + regexp + ')' ) ) else: self.__match.append( i.escaped() ) def match( self, s, dict ): if self.__regexp is None: m = re.compile( ''.join( [i( dict) for i in self.__match] ) ).match( s ) else: m = self.__match.match( s ) if m: result = {} for groupname, value in m.groupdict().iteritems(): attributes = self.groupnameToAttributes[ groupname ] d = result for k in attributes[ :-1 ]: d = d.setdefault( k, {} ) d[ attributes[ -1 ] ] = value return result return None def __str__( self ): if self.__regexp is None: return '' else: return '' def __repr__( self ): return str( self ) def __init__( self, pattern ): self.pattern = pattern splitNamedPattern = re.compile( r'([^<>{]*){([^{}<>]*)}(.*)' ) splitAttribute = re.compile( r'([^{}<]*)<([^{}<>]*)>(.*)' ) check = re.compile( r'.*[{}<>].*' ) matchList = [] unmatchList = [] while pattern: m = splitAttribute.match( pattern ) if m: litteral = m.group( 1 ) if litteral: if check.match( litteral ): raise ValueError( 'Invalid pattern' ) ma, un = DictPattern._replaceStarAndSharp( litteral ) matchList += ma unmatchList += un attribute = m.group(2) r = DictPattern.Attribute( attribute ) matchList.append( r ) unmatchList.append( r ) pattern = m.group( 3 ) continue m = splitNamedPattern.match( pattern ) if m: litteral = m.group( 1 ) if litteral: if check.match( litteral ): raise ValueError( 'Invalid pattern' ) ma, un = DictPattern._replaceStarAndSharp( litteral ) matchList += ma unmatchList += un patternName = m.group(2) i = patternName.find( '|' ) if i >= 0: pattern = n[ i+1: ] patternName = patternName[ :i ] else: pattern = '.+' matchList.append( ( patternName, pattern ) ) unmatchList.append( DictPattern.MatchResult( patternName ) ) pattern = m.group( 3 ) continue if check.match( pattern ): raise ValueError( 'Invalid pattern' ) ma, un = DictPattern._replaceStarAndSharp( pattern ) matchList += ma unmatchList += un break self.unmatchList = unmatchList onlyLitteral = True self.matchPrefix = [] while matchList and isinstance( matchList[0], DictPattern.Constant ): i = matchList.pop(0) self.matchPrefix.append( i ) if not isinstance( i, DictPattern.Litteral ): onlyLitteral = False if onlyLitteral: self.matchPrefix = ''.join( [i({}) for i in self.matchPrefix] ) onlyLitteral = True self.matchSufix = [] while matchList and isinstance( matchList[-1], DictPattern.Constant ): i = matchList.pop() self.matchSufix.insert( 0, i ) if not isinstance( i, DictPattern.Litteral ): onlyLitteral = False if onlyLitteral: self.matchSufix = ''.join( [i({}) for i in self.matchSufix] ) if matchList: self.matchInfix = DictPattern.RegexpMatch( matchList ) else: self.matchInfix = None def _replaceSharp( s ): l = s.split( '#' ) match = [ DictPattern.Litteral( l[0] ) ] unmatch = [ DictPattern.Litteral( l[0] ) ] l.pop( 0 ) while l: i = l.pop( 0 ) litteral = DictPattern.Litteral( i ) match += [ ( 'name_serie', '[0-9]+' ), litteral ] unmatch += [ DictPattern.MatchResult( 'name_serie' ), litteral ] return match, unmatch _replaceSharp = staticmethod( _replaceSharp ) def _replaceStarAndSharp( s ): l = s.split( '*' ) match, unmatch = DictPattern._replaceSharp( l[0] ) l.pop( 0 ) while l: i = l.pop( 0 ) match.append( ( 'filename_variable', '.+' ) ) unmatch.append( DictPattern.MatchResult( 'filename_variable' ) ) m, u = DictPattern._replaceSharp( i ) match += m unmatch += u return match, unmatch _replaceStarAndSharp = staticmethod( _replaceStarAndSharp ) def __getstate__( self ): return self.pattern def __setstate__( self, state ): self.__init__( state ) def match( self, s, dict ): """ Checks if the given string matches the pattern. :param string s: the string which should match the pattern :param dict: a dictionary containing the values to set to the attributes named in the pattern. :returns: a dictionary containing the value of each named expression of the pattern found in the string, None if the string doesn't match the pattern. """ try: if self.matchPrefix: if isinstance( self.matchPrefix, list ): prefix = ''.join( [i( dict ) for i in self.matchPrefix] ) else: prefix = self.matchPrefix if not s.startswith( prefix ): return None s = s[ len( prefix ): ] if self.matchSufix: if isinstance( self.matchSufix, list ): sufix = ''.join( [i( dict ) for i in self.matchSufix] ) else: sufix = self.matchSufix if not s.endswith( sufix ): return None s = s[ :-len( sufix ) ] if self.matchInfix is not None: return self.matchInfix.match( s, dict ) if s: return None return {} except KeyError: return None def unmatch( self, matchResult, dict ): """ The opposite of :py:meth:`match` method: the matching string is found from a match result and a dictionary of attributes values. :param matchResult: dictionary which associates a value to each named expression of the pattern. :param dict: dictionary which associates a value to each attribute name of the pattern. :rtype: string :returns: the rebuilt matching string. """ try: return ''.join( [i( dict, matchResult ) for i in self.unmatchList] ) except KeyError, e: #print '!unmatch!', e return None def multipleUnmatch( self, dict ): #print '!multipleUnmatch!', self, dict # Retrieve attributes() and namedRegex() attributes = [] for i in self.unmatchList: if isinstance( i, DictPattern.Attribute ): if i._Attribute__key not in attributes: attributes.append( i._Attribute__key ) elif isinstance( i, DictPattern.MatchResult ): if i._MatchResult__key not in attributes: attributes.append( i._MatchResult__key ) #print '!multipleUnmatch! attributes =', attributes # Check attributes values that are list multipleValues = [] for a in attributes: if a == 'name_serie': continue v = dict.get( a ) if isinstance( v, list ): if multipleValues: newMultipleValues = [] for d in multipleValues: l = [ {a: i} for i in v ] for d2 in l: d2.update( d ) newMultipleValues.extend( l ) multipleValues = newMultipleValues else: multipleValues = [ {a: i} for i in v ] #print '!multipleUnmatch! multipleValues =', multipleValues if multipleValues: result = [] d = dict.copy() for d2 in multipleValues: d.update( d2 ) r = self.unmatch( d, d ) #print '!multipleUnmatch! call unmatch', d, '-->', r if r: result.append( ( r, d2 ) ) #print '!multipleUnmatch! -->', result return result else: r = self.unmatch( dict, dict ) if r: #print '!multipleUnmatch! -->', [ ( r, {} ) ] return [ ( r, {} ) ] #print '!multipleUnmatch! -->', [] return [] def attributes( self ): sent = [] for i in self.unmatchList: if isinstance( i, DictPattern.Attribute ): if i._Attribute__key not in sent: sent.append( i._Attribute__key ) yield i._Attribute__key def namedRegex( self ): sent = [] for i in self.unmatchList: if isinstance( i, DictPattern.MatchResult ): if i._MatchResult__key not in sent: sent.append( i._MatchResult__key ) yield i._MatchResult__key def __str__( self ): # return '' return '' def __repr__( self ): return self.__str__() def __eq__( self, other ): return self.pattern == getattr( other, 'pattern', other ) def __ne__( self, other ): return self.pattern != getattr( other, 'pattern', other ) def __hash__( self ): return hash( self.pattern )