from __future__ import absolute_import, print_function, division import copy import sys from theano.compat import DefaultOrderedDict from theano.misc.ordered_set import OrderedSet from six import StringIO, integer_types from theano.gof import opt from theano import config class DB(object): def __hash__(self): if not hasattr(self, '_optimizer_idx'): self._optimizer_idx = opt._optimizer_idx[0] opt._optimizer_idx[0] += 1 return self._optimizer_idx def __init__(self): self.__db__ = DefaultOrderedDict(OrderedSet) self._names = set() self.name = None # will be reset by register # (via obj.name by the thing doing the registering) def register(self, name, obj, *tags, **kwargs): """ Parameters ---------- name : str Name of the optimizer. obj The optimizer to register. tags Tag name that allow to select the optimizer. kwargs If non empty, should contain only use_db_name_as_tag=False. By default, all optimizations registered in EquilibriumDB are selected when the EquilibriumDB name is used as a tag. We do not want this behavior for some optimizer like local_remove_all_assert. use_db_name_as_tag=False remove that behavior. This mean only the optimizer name and the tags specified will enable that optimization. """ # N.B. obj is not an instance of class Optimizer. # It is an instance of a DB.In the tests for example, # this is not always the case. if not isinstance(obj, (DB, opt.Optimizer, opt.LocalOptimizer)): raise TypeError('Object cannot be registered in OptDB', obj) if name in self.__db__: raise ValueError('The name of the object cannot be an existing' ' tag or the name of another existing object.', obj, name) if kwargs: assert "use_db_name_as_tag" in kwargs assert kwargs["use_db_name_as_tag"] is False else: if self.name is not None: tags = tags + (self.name,) obj.name = name # This restriction is there because in many place we suppose that # something in the DB is there only once. if obj.name in self.__db__: raise ValueError('''You can\'t register the same optimization multiple time in a DB. Tryed to register "%s" again under the new name "%s". Use theano.gof.ProxyDB to work around that''' % (obj.name, name)) self.__db__[name] = OrderedSet([obj]) self._names.add(name) self.__db__[obj.__class__.__name__].add(obj) self.add_tags(name, *tags) def add_tags(self, name, *tags): obj = self.__db__[name] assert len(obj) == 1 obj = obj.copy().pop() for tag in tags: if tag in self._names: raise ValueError('The tag of the object collides with a name.', obj, tag) self.__db__[tag].add(obj) def remove_tags(self, name, *tags): obj = self.__db__[name] assert len(obj) == 1 obj = obj.copy().pop() for tag in tags: if tag in self._names: raise ValueError('The tag of the object collides with a name.', obj, tag) self.__db__[tag].remove(obj) def __query__(self, q): if not isinstance(q, Query): raise TypeError('Expected a Query.', q) # The ordered set is needed for deterministic optimization. variables = OrderedSet() for tag in q.include: variables.update(self.__db__[tag]) for tag in q.require: variables.intersection_update(self.__db__[tag]) for tag in q.exclude: variables.difference_update(self.__db__[tag]) remove = OrderedSet() add = OrderedSet() for obj in variables: if isinstance(obj, DB): def_sub_query = q if q.extra_optimizations: def_sub_query = copy.copy(q) def_sub_query.extra_optimizations = [] sq = q.subquery.get(obj.name, def_sub_query) replacement = obj.query(sq) replacement.name = obj.name remove.add(obj) add.add(replacement) variables.difference_update(remove) variables.update(add) return variables def query(self, *tags, **kwtags): if len(tags) >= 1 and isinstance(tags[0], Query): if len(tags) > 1 or kwtags: raise TypeError('If the first argument to query is a Query,' ' there should be no other arguments.', tags, kwtags) return self.__query__(tags[0]) include = [tag[1:] for tag in tags if tag.startswith('+')] require = [tag[1:] for tag in tags if tag.startswith('&')] exclude = [tag[1:] for tag in tags if tag.startswith('-')] if len(include) + len(require) + len(exclude) < len(tags): raise ValueError("All tags must start with one of the following" " characters: '+', '&' or '-'", tags) return self.__query__(Query(include=include, require=require, exclude=exclude, subquery=kwtags)) def __getitem__(self, name): variables = self.__db__[name] if not variables: raise KeyError("Nothing registered for '%s'" % name) elif len(variables) > 1: raise ValueError('More than one match for %s (please use query)' % name) for variable in variables: return variable def __contains__(self, name): return name in self.__db__ def print_summary(self, stream=sys.stdout): print("%s (id %i)" % (self.__class__.__name__, id(self)), file=stream) print(" names", self._names, file=stream) print(" db", self.__db__, file=stream) class Query(object): """ Parameters ---------- position_cutoff : float Used by SequenceDB to keep only optimizer that are positioned before the cut_off point. """ def __init__(self, include, require=None, exclude=None, subquery=None, position_cutoff=float('inf'), extra_optimizations=None): self.include = OrderedSet(include) self.require = require or OrderedSet() self.exclude = exclude or OrderedSet() self.subquery = subquery or {} self.position_cutoff = position_cutoff if extra_optimizations is None: extra_optimizations = [] self.extra_optimizations = extra_optimizations if isinstance(self.require, (list, tuple)): self.require = OrderedSet(self.require) if isinstance(self.exclude, (list, tuple)): self.exclude = OrderedSet(self.exclude) def __str__(self): return ("Query{inc=%s,ex=%s,require=%s,subquery=%s," "position_cutoff=%f,extra_opts=%s}" % (self.include, self.exclude, self.require, self.subquery, self.position_cutoff, self.extra_optimizations)) def __setstate__(self, state): self.__dict__.update(state) if not hasattr(self, 'extra_optimizations'): self.extra_optimizations = [] # add all opt with this tag def including(self, *tags): return Query(self.include.union(tags), self.require, self.exclude, self.subquery, self.position_cutoff, self.extra_optimizations) # remove all opt with this tag def excluding(self, *tags): return Query(self.include, self.require, self.exclude.union(tags), self.subquery, self.position_cutoff, self.extra_optimizations) # keep only opt with this tag. def requiring(self, *tags): return Query(self.include, self.require.union(tags), self.exclude, self.subquery, self.position_cutoff, self.extra_optimizations) def register(self, *optimizations): return Query(self.include, self.require, self.exclude, self.subquery, self.position_cutoff, self.extra_optimizations + list(optimizations)) class EquilibriumDB(DB): """ A set of potential optimizations which should be applied in an arbitrary order until equilibrium is reached. Canonicalize, Stabilize, and Specialize are all equilibrium optimizations. Parameters ---------- ignore_newtrees If False, we will apply local opt on new node introduced during local optimization application. This could result in less fgraph iterations, but this doesn't mean it will be faster globally. tracks_on_change_inputs If True, we will re-apply local opt on nodes whose inputs changed during local optimization application. This could result in less fgraph iterations, but this doesn't mean it will be faster globally. Notes ----- We can put LocalOptimizer and Optimizer as EquilibriumOptimizer suppor both. It is probably not a good idea to have ignore_newtrees=False and tracks_on_change_inputs=True """ def __init__(self, ignore_newtrees=True, tracks_on_change_inputs=False): super(EquilibriumDB, self).__init__() self.ignore_newtrees = ignore_newtrees self.tracks_on_change_inputs = tracks_on_change_inputs self.__final__ = {} self.__cleanup__ = {} def register(self, name, obj, *tags, **kwtags): final_opt = kwtags.pop('final_opt', False) cleanup = kwtags.pop('cleanup', False) # An opt should not be final and clean up assert not (final_opt and cleanup) super(EquilibriumDB, self).register(name, obj, *tags, **kwtags) self.__final__[name] = final_opt self.__cleanup__[name] = cleanup def query(self, *tags, **kwtags): _opts = super(EquilibriumDB, self).query(*tags, **kwtags) final_opts = [o for o in _opts if self.__final__.get(o.name, False)] cleanup_opts = [o for o in _opts if self.__cleanup__.get(o.name, False)] opts = [o for o in _opts if o not in final_opts and o not in cleanup_opts] if len(final_opts) == 0: final_opts = None if len(cleanup_opts) == 0: cleanup_opts = None return opt.EquilibriumOptimizer( opts, max_use_ratio=config.optdb.max_use_ratio, ignore_newtrees=self.ignore_newtrees, tracks_on_change_inputs=self.tracks_on_change_inputs, failure_callback=opt.NavigatorOptimizer.warn_inplace, final_optimizers=final_opts, cleanup_optimizers=cleanup_opts) class SequenceDB(DB): """ A sequence of potential optimizations. Retrieve a sequence of optimizations (a SeqOptimizer) by calling query(). Each potential optimization is registered with a floating-point position. No matter which optimizations are selected by a query, they are carried out in order of increasing position. The optdb itself (`theano.compile.mode.optdb`), from which (among many other tags) fast_run and fast_compile optimizers are drawn is a SequenceDB. """ seq_opt = opt.SeqOptimizer def __init__(self, failure_callback=opt.SeqOptimizer.warn): super(SequenceDB, self).__init__() self.__position__ = {} self.failure_callback = failure_callback def register(self, name, obj, position, *tags): super(SequenceDB, self).register(name, obj, *tags) if position == 'last': if len(self.__position__) == 0: self.__position__[name] = 0 else: self.__position__[name] = max(self.__position__.values()) + 1 else: assert isinstance(position, (integer_types, float)) self.__position__[name] = position def query(self, *tags, **kwtags): """ Parameters ---------- position_cutoff : float or int Only optimizations with position less than the cutoff are returned. """ opts = super(SequenceDB, self).query(*tags, **kwtags) position_cutoff = kwtags.pop('position_cutoff', config.optdb.position_cutoff) position_dict = self.__position__ if len(tags) >= 1 and isinstance(tags[0], Query): # the call to super should have raise an error with a good message assert len(tags) == 1 if getattr(tags[0], 'position_cutoff', None): position_cutoff = tags[0].position_cutoff # The Query instance might contain extra optimizations which need # to be added the the sequence of optimizations (don't alter the # original dictionary) if len(tags[0].extra_optimizations) > 0: position_dict = position_dict.copy() for extra_opt in tags[0].extra_optimizations: # Give a name to the extra optimization (include both the # class name for descriptiveness and id to avoid name # collisions) opt, position = extra_opt opt.name = "%s_%i" % (opt.__class__, id(opt)) # Add the extra optimization to the optimization sequence if position < position_cutoff: opts.add(opt) position_dict[opt.name] = position opts = [o for o in opts if position_dict[o.name] < position_cutoff] opts.sort(key=lambda obj: (position_dict[obj.name], obj.name)) kwargs = {} if self.failure_callback: kwargs["failure_callback"] = self.failure_callback ret = self.seq_opt(opts, **kwargs) if hasattr(tags[0], 'name'): ret.name = tags[0].name return ret def print_summary(self, stream=sys.stdout): print(self.__class__.__name__ + " (id %i)" % id(self), file=stream) positions = list(self.__position__.items()) def c(a, b): return ((a[1] > b[1]) - (a[1] < b[1])) positions.sort(c) print(" position", positions, file=stream) print(" names", self._names, file=stream) print(" db", self.__db__, file=stream) def __str__(self): sio = StringIO() self.print_summary(sio) return sio.getvalue() class LocalGroupDB(DB): """ Generate a local optimizer of type LocalOptGroup instead of a global optimizer. It supports the tracks, to only get applied to some Op. """ def __init__(self, apply_all_opts=False, profile=False, local_opt=opt.LocalOptGroup): super(LocalGroupDB, self).__init__() self.failure_callback = None self.apply_all_opts = apply_all_opts self.profile = profile self.__position__ = {} self.local_opt = local_opt def register(self, name, obj, *tags, **kwargs): super(LocalGroupDB, self).register(name, obj, *tags) position = kwargs.pop('position', 'last') if position == 'last': if len(self.__position__) == 0: self.__position__[name] = 0 else: self.__position__[name] = max(self.__position__.values()) + 1 else: assert isinstance(position, (integer_types, float)) self.__position__[name] = position def query(self, *tags, **kwtags): # For the new `useless` optimizer opts = list(super(LocalGroupDB, self).query(*tags, **kwtags)) opts.sort(key=lambda obj: (self.__position__[obj.name], obj.name)) ret = self.local_opt(*opts, apply_all_opts=self.apply_all_opts, profile=self.profile) return ret class TopoDB(DB): """ Generate a Global Optimizer of type TopoOptimizer. """ def __init__(self, db, order='in_to_out', ignore_newtrees=False, failure_callback=None): super(TopoDB, self).__init__() self.db = db self.order = order self.ignore_newtrees = ignore_newtrees self.failure_callback = failure_callback def query(self, *tags, **kwtags): return opt.TopoOptimizer(self.db.query(*tags, **kwtags), self.order, self.ignore_newtrees, self.failure_callback) class ProxyDB(DB): """ Wrap an existing proxy. This is needed as we can't register the same DB mutiple times in different positions in a SequentialDB. """ def __init__(self, db): assert isinstance(db, DB), "" self.db = db def query(self, *tags, **kwtags): return self.db.query(*tags, **kwtags)