""" .. warning:: This directory is for the internal of Theano. You are strongly advised not to use it, except if you know what you are doing! If you want to use a scalar variable in a Theano graph, you probably want to use theano.tensor.[c,z,f,d,b,w,i,l,]scalar! """ from __future__ import absolute_import, print_function, division from itertools import chain import math import warnings from copy import copy from textwrap import dedent import numpy as np import six from six.moves import xrange import theano from theano.compat import imap, izip from theano import gof, printing from theano.gof import (Op, utils, Variable, Constant, Type, Apply, FunctionGraph) from functools import partial from theano import config from theano.gradient import DisconnectedType from theano.gradient import grad_undefined from theano.printing import pprint import collections builtin_bool = bool builtin_complex = complex builtin_int = int builtin_float = float class ComplexError(NotImplementedError): """ Raised if complex numbers are used in an unsupported operation. """ pass class IntegerDivisionError(Exception): """ Raised if someone tries to divide integers with '/' instead of '//'. """ pass def upcast(dtype, *dtypes): # This tries to keep data in floatX or lower precision, unless we # explicitely request a higher precision datatype. keep_float32 = [(config.cast_policy == 'numpy+floatX' and config.floatX == 'float32')] keep_float16 = [(config.cast_policy == 'numpy+floatX' and config.floatX == 'float16')] def make_array(dt): if dt == 'float64': # There is an explicit float64 dtype: we cannot keep float32. keep_float32[0] = False keep_float16[0] = False if dt == 'float32': keep_float16[0] = False return np.zeros((), dtype=dt) z = make_array(dtype) for dt in dtypes: z = z + make_array(dt=dt) rval = str(z.dtype) if rval == 'float64': if keep_float16[0]: return 'float16' if keep_float32[0]: return 'float32' elif rval == 'float32': if keep_float16[0]: return 'float16' return rval def as_common_dtype(*vars): """ For for theano.scalar.Scalar and TensorVariable. """ dtype = upcast(*[v.dtype for v in vars]) return (v.astype(dtype) for v in vars) def get_scalar_type(dtype): """ Return a Scalar(dtype) object. This caches objects to save allocation and run time. """ if dtype not in get_scalar_type.cache: get_scalar_type.cache[dtype] = Scalar(dtype=dtype) return get_scalar_type.cache[dtype] get_scalar_type.cache = {} def as_scalar(x, name=None): from ..tensor import TensorType, scalar_from_tensor if isinstance(x, gof.Apply): if len(x.outputs) != 1: raise ValueError("It is ambiguous which output of a multi-output" " Op has to be fetched.", x) else: x = x.outputs[0] if isinstance(x, Variable): if isinstance(x.type, Scalar): return x elif isinstance(x.type, TensorType) and x.ndim == 0: return scalar_from_tensor(x) else: raise TypeError("Variable type field must be a Scalar.", x, x.type) try: return constant(x) except TypeError: raise TypeError("Cannot convert %s to Scalar" % x, type(x)) class NumpyAutocaster(object): """ This class is used to cast python ints and floats to numpy arrays. The behavior when called on scalar `x` depends on `config.cast_policy`: - 'numpy' will simply use the same type as found by `numpy.asarray(x)`. - 'numpy+floatX' will do the same, except it will use float32 instead of float64 if `x` is a Python float and `config.floatX` is set to 'float32' (note that if `x` is a numpy scalar whose data type is float64, it is not modified since we assume the user is purposely using float64). - 'custom' lets one define a tuple of data types such that: - if `x` is already a numpy scalar and its data type is in this tuple, then it is returned unchanged; - otherwise, the first data type in this tuple that can represent `x` without loss of precision will be used, unless `x` is a float and 'float32' is in the tuple (in which case `x` is cast as a float32); - if no data type can represent `x` without loss of precision, then the last data type in the tuple will be used. Parameters ---------- dtypes: tuple of strings The ordered list of preferred data types (only used when `config.cast_policy` is set to 'custom', see the `NumpyAutocaster` help for details). """ def __init__(self, dtypes): self.dtypes = tuple(dtypes) def __call__(self, x): # Make sure we only deal with scalars. assert (isinstance(x, six.integer_types) or isinstance(x, builtin_float) or (isinstance(x, np.ndarray) and x.ndim == 0)) if config.cast_policy == 'numpy': return np.asarray(x) elif config.cast_policy == 'numpy+floatX': rval = np.asarray(x) if ((not hasattr(x, 'dtype') and rval.dtype in ('float64', 'float32') and rval.dtype != config.floatX)): rval = theano._asarray(rval, dtype=config.floatX) return rval # The following is the original code, corresponding to the 'custom' # option for `config.cast_policy`. assert config.cast_policy == 'custom' try: # Pass through numpy scalars, since they are already typed on # purpose typically. if str(x.dtype) in self.dtypes: # No need to cast `x` into a new dtype. Note that we still # need to convert it into an array, because it may not be # one already (e.g. if x == numpy.float64(1.1)). return np.asarray(x) except AttributeError: # Means `x` has no 'dtype' attribute. pass # unsafe downcast of float64 variables when config.floatX == 'float32' # recall: float is numpy.float if ((isinstance(x, float) and config.floatX in self.dtypes and config.floatX != 'float64')): return theano._asarray(x, dtype=config.floatX) # Don't autocast to float16 unless config.floatX is float16 try_dtypes = [d for d in self.dtypes if config.floatX == 'float16' or d != 'float16'] for dtype in try_dtypes: x_ = theano._asarray(x, dtype=dtype) if np.all(x == x_): break # returns either an exact x_==x, or the last cast x_ return x_ autocast_int = NumpyAutocaster(('int8', 'int16', 'int32', 'int64')) # autocast_float dtypes might be manipulated in tensor.* autocast_float = NumpyAutocaster(('float16', 'float32', 'float64')) class autocast_float_as(object): """ Temporarily adjust autocasting behavior. This class makes it possible to temporarily and locally adjust autocasting behavior when `config.cast_policy` is set to 'custom'. If `config.cast_policy` is not 'custom', an exception is raised. This class might be convenient in some code, but it definitely helps to test the autocasting mechanism. Examples -------- >>> with autocast_float_as('float32'): ... assert (fvector() + 1.1).dtype == 'float32' # temporary downcasting >>> assert (fvector() + 1.1).dtype == 'float64' # back to default behaviour """ def __init__(self, *dtypes): self.dtypes = dtypes assert config.cast_policy == 'custom' def __enter__(self): assert config.cast_policy == 'custom' self.old_dtypes = autocast_float.dtypes autocast_float.dtypes = self.dtypes def __exit__(self, *args): assert config.cast_policy == 'custom' autocast_float.dtypes = self.old_dtypes def convert(x, dtype=None): """ Convert the input to a properly typed numpy value according to the current casting policy. Work with scalars and tensors. """ if dtype is not None: # in this case, the semantics are that the caller is forcing the dtype x_ = theano._asarray(x, dtype=dtype) else: # In this case, this function should infer the dtype according to the # autocasting rules. See autocasting above. x_ = None if isinstance(x, six.integer_types): try: x_ = autocast_int(x) except OverflowError: # This is to imitate numpy behavior which tries to fit # bigger numbers into a uint64. x_ = theano._asarray(x, dtype='uint64') elif isinstance(x, builtin_float): x_ = autocast_float(x) elif isinstance(x, np.ndarray): x_ = x else: # Here x is probably a list or a tuple. If it contains a # long, we will behave like the current NumPy version: it # will work if the long fits in int64 or uint64. x_ = np.asarray(x) if x_.size == 0 and not hasattr(x, 'dtype'): x_ = np.asarray(x, dtype=config.floatX) assert type(x_) in [np.ndarray, np.memmap] return x_ def constant(x, name=None, dtype=None): x = convert(x, dtype=dtype) assert x.ndim == 0 return ScalarConstant(get_scalar_type(str(x.dtype)), x, name=name) class Scalar(Type): """ Internal class, should not be used by clients. Primarily used by tensor.elemwise and tensor.reduce. Analogous to TensorType, but for zero-dimensional objects. Maps directly to C primitives. TODO: refactor to be named ScalarType for consistency with TensorType. """ __props__ = ('dtype',) ndim = 0 def __init__(self, dtype): if dtype == 'floatX': dtype = config.floatX self.dtype = dtype self.dtype_specs() # error checking @staticmethod def may_share_memory(a, b): # This class represent basic c type, represented in python # with numpy.scalar. They are read only. So from python, they # can never share memory. return False def filter(self, data, strict=False, allow_downcast=None): py_type = self.dtype_specs()[0] if strict and not isinstance(data, py_type): raise TypeError("%s expected a %s, got %s of type %s" % ( self, py_type, data, type(data)), data) try: converted_data = py_type(data) if (allow_downcast or (allow_downcast is None and type(data) is float and self.dtype == theano.config.floatX) or data == converted_data): return py_type(data) else: raise TypeError('Value cannot accurately be converted to dtype' ' (%s) and allow_downcast is not True' % self.dtype) except Exception as e: raise TypeError("Could not convert %s (value=%s) to %s" % ( type(data), data, self.dtype), e) def values_eq_approx(self, a, b, tolerance=1e-4): # The addition have risk of overflow especially with [u]int8 if self.dtype == 'bool': return a == b diff = a - b if diff == 0: return True return abs(diff) <= (abs(a) * tolerance) + (abs(b) * tolerance) def c_element_type(self): return self.dtype_specs()[1] def c_headers(self, c_compiler): l = [''] # These includes are needed by Scalar and TensorType, # we declare them here and they will be re-used by TensorType l.append('') l.append('') if config.lib.amdlibm and c_compiler.supports_amdlibm: l += [''] return l def c_libraries(self, c_compiler): l = [] if config.lib.amdlibm and c_compiler.supports_amdlibm: l += ['amdlibm'] return l def c_compile_args(self, c_compiler): if config.lib.amdlibm and c_compiler.supports_amdlibm: return ['-DREPLACE_WITH_AMDLIBM'] else: return [] def dtype_specs(self): try: # To help debug dtype/typenum problem, here is code to get # the list of numpy typenum. This list change between 32 # and 64 bit platform and probably also also between # Windows and Linux. # NOTE: equivalent type on a platform can have different typenum. # This is the source of all dtype/typenum problem found up to # now, as Theano always expect the exact typenum that # correspond to our supported dtype. """ for dtype in ['bool', 'int8', 'uint8', 'short', 'ushort', 'intc', 'uintc', 'longlong', 'ulonglong', 'single', 'double', 'longdouble', 'csingle', 'cdouble', 'clongdouble', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', 'complex64', 'complex128', 'float', 'double', 'int', 'uint']: print(dtype, np.zeros(1, dtype=dtype).dtype.num) """ return { # dtype: (py_type, c_type, cls_name) 'float16': (np.float16, 'npy_float16', 'Float16'), 'float32': (np.float32, 'npy_float32', 'Float32'), 'float64': (np.float64, 'npy_float64', 'Float64'), 'complex128': (np.complex128, 'theano_complex128', 'Complex128'), 'complex64': (np.complex64, 'theano_complex64', 'Complex64'), 'bool': (np.bool_, 'npy_bool', 'Bool'), 'uint8': (np.uint8, 'npy_uint8', 'UInt8'), 'int8': (np.int8, 'npy_int8', 'Int8'), 'uint16': (np.uint16, 'npy_uint16', 'UInt16'), 'int16': (np.int16, 'npy_int16', 'Int16'), 'uint32': (np.uint32, 'npy_uint32', 'UInt32'), 'int32': (np.int32, 'npy_int32', 'Int32'), 'uint64': (np.uint64, 'npy_uint64', 'UInt64'), 'int64': (np.int64, 'npy_int64', 'Int64') }[self.dtype] except KeyError: raise TypeError("Unsupported dtype for %s: %s" % ( self.__class__.__name__, self.dtype)) def upcast(self, *others): return upcast(*[x.dtype for x in [self] + list(others)]) def make_variable(self, name=None): return ScalarVariable(self, name=name) def __str__(self): return str(self.dtype) def __repr__(self): return "Scalar(%s)" % self.dtype def c_literal(self, data): if 'complex' in self.dtype: raise NotImplementedError("No literal for complex values.") if self.dtype == 'bool': return '1' if data else '0' return str(data) def c_declare(self, name, sub, check_input=True): if(check_input): pre = """ typedef %(dtype)s dtype_%(name)s; """ % dict(name=name, dtype=self.dtype_specs()[1]) else: pre = "" return pre + """ %(dtype)s %(name)s; """ % dict(name=name, dtype=self.dtype_specs()[1]) def c_init(self, name, sub): return """ %(name)s = 0; """ % locals() def c_extract(self, name, sub, check_input=True): if self.dtype == 'float16': # This doesn't work at the numpy level raise NotImplementedError('float16') specs = self.dtype_specs() if(check_input): pre = """ if (!PyObject_TypeCheck(py_%(name)s, &%(pyarr_type)s)) { PyErr_Format(PyExc_ValueError, "Scalar check failed (%(dtype)s)"); %(fail)s } """ % dict(sub, name=name, dtype=specs[1], pyarr_type='Py%sArrType_Type' % specs[2]) else: pre = "" return pre + """ PyArray_ScalarAsCtype(py_%(name)s, &%(name)s); """ % dict(sub, name=name) def c_sync(self, name, sub): specs = self.dtype_specs() return """ Py_XDECREF(py_%(name)s); py_%(name)s = PyArrayScalar_New(%(cls)s); if (!py_%(name)s) { Py_XINCREF(Py_None); py_%(name)s = Py_None; PyErr_Format(PyExc_MemoryError, "Instantiation of new Python scalar failed (%(dtype)s)"); %(fail)s } PyArrayScalar_ASSIGN(py_%(name)s, %(cls)s, %(name)s); """ % dict(sub, name=name, dtype=specs[1], cls=specs[2]) def c_cleanup(self, name, sub): return "" def c_support_code(self): if self.dtype.startswith('complex'): cplx_types = ['theano_complex64', 'theano_complex128'] real_types = ['npy_int8', 'npy_int16', 'npy_int32', 'npy_int64', 'npy_float32', 'npy_float64'] # If the 'int' C type is not exactly the same as an existing # 'npy_intX', some C code may not compile, e.g. when assigning # the value 0 (cast to 'int' in C) to a theano_complex64. if (np.dtype('intc').num not in [np.dtype(d[4:]).num for d in real_types]): # In that case we add the 'int' type to the real types. real_types.append('int') template = """ struct theano_complex%(nbits)s : public npy_complex%(nbits)s { typedef theano_complex%(nbits)s complex_type; typedef npy_float%(half_nbits)s scalar_type; complex_type operator +(const complex_type &y) const { complex_type ret; ret.real = this->real + y.real; ret.imag = this->imag + y.imag; return ret; } complex_type operator -() const { complex_type ret; ret.real = -this->real; ret.imag = -this->imag; return ret; } bool operator ==(const complex_type &y) const { return (this->real == y.real) && (this->imag == y.imag); } bool operator ==(const scalar_type &y) const { return (this->real == y) && (this->imag == 0); } complex_type operator -(const complex_type &y) const { complex_type ret; ret.real = this->real - y.real; ret.imag = this->imag - y.imag; return ret; } complex_type operator *(const complex_type &y) const { complex_type ret; ret.real = this->real * y.real - this->imag * y.imag; ret.imag = this->real * y.imag + this->imag * y.real; return ret; } complex_type operator /(const complex_type &y) const { complex_type ret; scalar_type y_norm_square = y.real * y.real + y.imag * y.imag; ret.real = (this->real * y.real + this->imag * y.imag) / y_norm_square; ret.imag = (this->imag * y.real - this->real * y.imag) / y_norm_square; return ret; } template complex_type& operator =(const T& y); theano_complex%(nbits)s() {} template theano_complex%(nbits)s(const T& y) { *this = y; } template theano_complex%(nbits)s(const TR& r, const TI& i) { this->real=r; this->imag=i; } }; """ def operator_eq_real(mytype, othertype): return ''' template <> %(mytype)s & %(mytype)s::operator=<%(othertype)s>(const %(othertype)s & y) { this->real=y; this->imag=0; return *this; } ''' % dict(mytype=mytype, othertype=othertype) def operator_eq_cplx(mytype, othertype): return ''' template <> %(mytype)s & %(mytype)s::operator=<%(othertype)s>(const %(othertype)s & y) { this->real=y.real; this->imag=y.imag; return *this; } ''' % dict(mytype=mytype, othertype=othertype) operator_eq = (''.join(operator_eq_real(ctype, rtype) for ctype in cplx_types for rtype in real_types) + ''.join(operator_eq_cplx(ctype1, ctype2) for ctype1 in cplx_types for ctype2 in cplx_types)) # We are not using C++ generic templating here, because this would # generate two different functions for adding a complex64 and a # complex128, one returning a complex64, the other a complex128, # and the compiler complains it is ambiguous. # Instead, we generate code for known and safe types only. def operator_plus_real(mytype, othertype): return ''' const %(mytype)s operator+(const %(mytype)s &x, const %(othertype)s &y) { return %(mytype)s(x.real+y, x.imag); } const %(mytype)s operator+(const %(othertype)s &y, const %(mytype)s &x) { return %(mytype)s(x.real+y, x.imag); } ''' % dict(mytype=mytype, othertype=othertype) operator_plus = ''.join(operator_plus_real(ctype, rtype) for ctype in cplx_types for rtype in real_types) def operator_minus_real(mytype, othertype): return ''' const %(mytype)s operator-(const %(mytype)s &x, const %(othertype)s &y) { return %(mytype)s(x.real-y, x.imag); } const %(mytype)s operator-(const %(othertype)s &y, const %(mytype)s &x) { return %(mytype)s(y-x.real, -x.imag); } ''' % dict(mytype=mytype, othertype=othertype) operator_minus = ''.join(operator_minus_real(ctype, rtype) for ctype in cplx_types for rtype in real_types) def operator_mul_real(mytype, othertype): return ''' const %(mytype)s operator*(const %(mytype)s &x, const %(othertype)s &y) { return %(mytype)s(x.real*y, x.imag*y); } const %(mytype)s operator*(const %(othertype)s &y, const %(mytype)s &x) { return %(mytype)s(x.real*y, x.imag*y); } ''' % dict(mytype=mytype, othertype=othertype) operator_mul = ''.join(operator_mul_real(ctype, rtype) for ctype in cplx_types for rtype in real_types) return (template % dict(nbits=64, half_nbits=32) + template % dict(nbits=128, half_nbits=64) + operator_eq + operator_plus + operator_minus + operator_mul) else: return "" def c_init_code(self): return ["import_array();"] def c_code_cache_version(self): return (13, np.__version__) def get_shape_info(self, obj): return obj.itemsize def get_size(self, shape_info): return shape_info # Register C code for ViewOp on Scalars. theano.compile.register_view_op_c_code( Scalar, """ %(oname)s = %(iname)s; """, 1) bool = get_scalar_type('bool') int8 = get_scalar_type('int8') int16 = get_scalar_type('int16') int32 = get_scalar_type('int32') int64 = get_scalar_type('int64') uint8 = get_scalar_type('uint8') uint16 = get_scalar_type('uint16') uint32 = get_scalar_type('uint32') uint64 = get_scalar_type('uint64') float16 = get_scalar_type('float16') float32 = get_scalar_type('float32') float64 = get_scalar_type('float64') complex64 = get_scalar_type('complex64') complex128 = get_scalar_type('complex128') int_types = int8, int16, int32, int64 uint_types = uint8, uint16, uint32, uint64 float_types = float16, float32, float64 complex_types = complex64, complex128 integer_types = int_types + uint_types discrete_types = (bool,) + integer_types continuous_types = float_types + complex_types all_types = discrete_types + continuous_types class _scalar_py_operators: # So that we can simplify checking code when we have a mixture of Scalar # variables and Tensor variables ndim = 0 dtype = property(lambda self: self.type.dtype) """The dtype of this scalar.""" # UNARY def __abs__(self): return abs_(self) def __neg__(self): return neg(self) # CASTS # def __int__(self): return AsInt(self).out # def __float__(self): return AsDouble(self).out # def __complex__(self): return AsComplex(self).out # BITWISE def __invert__(self): return invert(self) def __and__(self, other): return and_(self, other) def __or__(self, other): return or_(self, other) def __xor__(self, other): return xor(self, other) def __rand__(self, other): return and_(other, self) def __ror__(self, other): return or_(other, self) def __rxor__(self, other): return xor(other, self) # COMPARISONS def __lt__(self, other): return lt(self, other) def __le__(self, other): return le(self, other) def __gt__(self, other): return gt(self, other) def __ge__(self, other): return ge(self, other) # ARITHMETIC - NORMAL def __add__(self, other): return add(self, other) def __sub__(self, other): return sub(self, other) def __mul__(self, other): return mul(self, other) def __truediv__(self, other): return div_proxy(self, other) def __div__(self, other): return div_proxy(self, other) def __floordiv__(self, other): return int_div(self, other) def __mod__(self, other): return mod_check(self, other) def __pow__(self, other): return pow(self, other) # ARITHMETIC - RIGHT-OPERAND def __radd__(self, other): return add(other, self) def __rsub__(self, other): return sub(other, self) def __rmul__(self, other): return mul(other, self) def __rdiv__(self, other): return div_proxy(other, self) def __rmod__(self, other): return mod(other, self) def __rpow__(self, other): return pow(other, self) def zeros_like(self, dtype=None): # The second is needed for Elemwise ops to work right if dtype is None: dtype = str(self.type.dtype) return second(self, ScalarConstant(get_scalar_type(dtype), 0)) def ones_like(self, dtype=None): # The second is needed for Elemwise ops to work right if dtype is None: dtype = str(self.type.dtype) return second(self, ScalarConstant(get_scalar_type(dtype), 1)) def astype(self, dtype): return cast(self, dtype) class ScalarVariable(_scalar_py_operators, Variable): pass class ScalarConstant(_scalar_py_operators, Constant): pass # Register ScalarConstant as the type of Constant corresponding to Scalar Scalar.Constant = ScalarConstant # Easy constructors def _multi(*fns): def f2(f, names): if len(names) == 1: return f(names) else: return [f(name) for name in names] if len(fns) == 1: return partial(f2, fns[0]) else: return [partial(f2, f) for f in fns] ints = _multi(int64) floats = _multi(float64) complexs = _multi(complex128) complexs64 = _multi(complex64) complexs128 = _multi(complex128) def upcast_out(*types): dtype = Scalar.upcast(*types) return get_scalar_type(dtype), def upcast_out_nobool(*types): type = upcast_out(*types) if type[0] == bool: raise TypeError("bool output not supported") return type def upcast_out_min8(*types): type = upcast_out(*types) if type[0] == bool: return int8, return type def upgrade_to_float(*types): """ Upgrade any int types to float32 or float64 to avoid losing precision. """ conv = {bool: float32, int8: float32, int16: float32, int32: float64, int64: float64, uint8: float32, uint16: float32, uint32: float64, uint64: float64} return get_scalar_type(Scalar.upcast(*[conv.get(type, type) for type in types])), def upgrade_to_float64(*types): """ Upgrade any int and float32 to float64 to do as SciPy. """ return get_scalar_type('float64'), def same_out(type): return type, def same_out_nobool(type): if type == bool: raise TypeError("bool input not supported") return type, def same_out_min8(type): if type == bool: return int8, return type, def upcast_out_no_complex(*types): if any([type in complex_types for type in types]): raise TypeError('complex type are not supported') return get_scalar_type(dtype=Scalar.upcast(*types)), def same_out_float_only(type): if type not in float_types: raise TypeError('only float type are supported') return type, class transfer_type(gof.utils.object2): __props__ = ('transfer',) def __init__(self, *transfer): assert all(type(x) in [int, str] or x is None for x in transfer) self.transfer = transfer def __str__(self): return 'transfer_type{%s}' % self.transfer def __call__(self, *types): upcast = upcast_out(*types) retval = [] for i in self.transfer: if i is None: retval += [upcast] elif isinstance(i, str): retval += [i] else: retval += [types[i]] return retval # return [upcast if i is None else types[i] for i in self.transfer] class specific_out(gof.utils.object2): __props__ = ('spec',) def __init__(self, *spec): self.spec = spec def __call__(self, *types): return self.spec def int_out(*types): return int64, def float_out(*types): return float64, def upgrade_to_float_no_complex(*types): """ Don't accept complex, otherwise call upgrade_to_float(). """ for type in types: if type in complex_types: raise TypeError('complex argument not supported') return upgrade_to_float(*types) def same_out_nocomplex(type): if type in complex_types: raise TypeError('complex argument not supported') return type, def int_out_nocomplex(*types): for type in types: if type in complex_types: raise TypeError('complex argument not supported') return int64, def float_out_nocomplex(*types): for type in types: if type in complex_types: raise TypeError('complex argument not supported') return float64, class unary_out_lookup(gof.utils.object2): """ Get a output_types_preference object by passing a dictionary: unary_out_lookup({int8:int32, float32:complex128}) The result is an op that maps in8 to int32 and float32 to complex128 and other input types lead to a TypeError. """ def __init__(self, type_table): self.tbl = type_table def __call__(self, *types): if len(types) == 1: types = types[0] try: rval = self.tbl[types] except Exception: raise TypeError(types) if isinstance(types, (list, tuple)): return rval else: return [rval] def __eq__(self, other): return type(self) == type(other) and self.tbl == other.tbl def __hash__(self): return hash(type(self)) # ignore hash of table def real_out(type): if type == complex64: return float32, if type == complex128: return float64, return type, class ScalarOp(Op): nin = -1 nout = 1 def __init__(self, output_types_preference=None, name=None): self.name = name if output_types_preference is not None: if not isinstance(output_types_preference, collections.Callable): raise TypeError( "Expected a callable for the 'output_types_preference' argument to %s. (got: %s)" % (self.__class__, output_types_preference)) self.output_types_preference = output_types_preference def make_node(self, *inputs): if self.nin >= 0: if len(inputs) != self.nin: raise TypeError("Wrong number of inputs for %s.make_node (got %i(%s), expected %i)" % (self, len(inputs), str(inputs), self.nin)) inputs = [as_scalar(input) for input in inputs] outputs = [t() for t in self.output_types([input.type for input in inputs])] if len(outputs) != self.nout: raise TypeError("Not the right number of outputs produced for %s(%s). Expected %s, got %s." % (self, ", ".join(str(input) for input in inputs), self.nout, len(outputs))) return Apply(self, inputs, outputs) def output_types(self, types): if hasattr(self, 'output_types_preference'): variables = self.output_types_preference(*types) if not isinstance(variables, (list, tuple)) or any(not isinstance(x, Type) for x in variables): raise TypeError( "output_types_preference should return a list or a tuple of types", self.output_types_preference, variables) if len(variables) != self.nout: raise TypeError("Not the right number of outputs types produced for %s(%s) by %s. Expected %s, got %s." % (self, ", ".join(str(type) for type in variables), self.output_types_preference, self.nout, len(variables))) return variables else: raise NotImplementedError( "Cannot calculate the output types for %s" % self) def perform(self, node, inputs, output_storage): if self.nout == 1: output_storage[0][0] = self.impl(*inputs) else: variables = utils.from_return_values(self.impl(*inputs)) assert len(variables) == len(output_storage) for storage, variable in zip(output_storage, variables): storage[0] = variable def impl(self, *inputs): raise utils.MethodNotDefined("impl", type(self), self.__class__.__name__) def grad(self, inputs, output_gradients): raise utils.MethodNotDefined("grad", type(self), self.__class__.__name__) def L_op(self, inputs, outputs, output_gradients): return self.grad(inputs, output_gradients) def __eq__(self, other): test = (type(self) == type(other) and getattr(self, 'output_types_preference', None) == getattr(other, 'output_types_preference', None)) return test def __hash__(self): return hash((type(self), getattr(self, 'output_types_preference', 0))) def __str__(self): if hasattr(self, 'name') and self.name: return self.name else: param = [(k, v) for k, v in self.__dict__.items() if k not in ["name", "_op_use_c_code", "bool", "output_types_preference"]] if param: return "%s{%s}" % (self.__class__.__name__, ", ".join("%s=%s" % (k, v) for k, v in param)) else: return self.__class__.__name__ def c_code_cache_version(self): return (4,) def c_code_contiguous(self, node, name, inp, out, sub): """ This function is called by Elemwise when all inputs and outputs are c_contiguous. This allows to use the SIMD version of this op. The inputs are the same as c_code except that: - inp and out must be the names of the variables associated to the ndarrays in the C code - node must be the elemwise node (this is needed to know the inputs/outputs types) """ raise theano.gof.utils.MethodNotDefined() def supports_c_code(self, inputs, outputs): """Returns True if the current op has functioning C code for the given Elemwise inputs, outputs. """ try: tmp_s_input = [] # To keep the same aliasing between inputs mapping = dict() for ii in inputs: if ii in mapping: tmp_s_input.append(mapping[ii]) else: tmp = get_scalar_type(ii.dtype).make_variable() tmp_s_input.append(tmp) mapping[ii] = tmp_s_input[-1] with theano.change_flags(compute_test_value='ignore'): s_op = self(*tmp_s_input, return_list=True) # if the scalar_op don't have a c implementation, # we skip its fusion to allow the fusion of the # other ops. self.c_code(s_op[0].owner, "test_presence_of_c_code", ["x" for x in inputs], ["z" for z in outputs], {"fail": "%(fail)s"}) except (theano.gof.utils.MethodNotDefined, NotImplementedError): return False return True class UnaryScalarOp(ScalarOp): nin = 1 amd_float32 = None amd_float64 = None def c_code_contiguous(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if (not theano.config.lib.amdlibm or # We compare the dtype AND the broadcast flag # as this function do not broadcast node.inputs[0].type != node.outputs[0].type): raise theano.gof.utils.MethodNotDefined() dtype = node.inputs[0].type.dtype_specs()[1] fct_call = self.c_code_contiguous_raw(dtype, 'n', 'x', 'z') return """ { npy_intp n = PyArray_SIZE(%(z)s); %(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s); %(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s); %(fct_call)s; } """ % locals() def c_code_contiguous_raw(self, dtype, n, i, o): if not config.lib.amdlibm: raise theano.gof.utils.MethodNotDefined() if dtype.startswith('npy_'): dtype = dtype[4:] if dtype == 'float32' and self.amd_float32 is not None: dtype = 'float' fct = self.amd_float32 elif dtype == 'float64' and self.amd_float64 is not None: dtype = 'double' fct = self.amd_float64 else: raise theano.gof.utils.MethodNotDefined() return "%(fct)s(%(n)s, %(i)s, %(o)s)" % locals() class BinaryScalarOp(ScalarOp): # One may define in subclasses the following fields: # - `identity`: for an associative operation, identity corresponds to # the neutral element. For instance, it will be 0 for addition, 1 for # multiplication, True for "and", False for "or". # - `commutative`: whether op(a, b) == op(b, a) # - `associative`: whether op(op(a, b), c) == op(a, op(b, c)) nin = 2 ############### # Comparisons ############### class LogicalComparison(BinaryScalarOp): def __init__(self, *args, **kwargs): BinaryScalarOp.__init__(self, *args, **kwargs) # This is for compat with old pickles. self.bool = True def __eq__(self, other): return (BinaryScalarOp.__eq__(self, other) and getattr(self, 'bool', False) == getattr(other, 'bool', False)) def __hash__(self): # bool should always be True return BinaryScalarOp.__hash__(self) def output_types(self, *input_dtypes): return [bool] if getattr(self, 'bool', False) else [int8] def L_op(self, inputs, outputs, output_gradients): x, y = inputs assert outputs[0].type == bool return [x.zeros_like().astype(theano.config.floatX), y.zeros_like().astype(theano.config.floatX)] def c_code_cache_version(self): super_version = super(LogicalComparison, self).c_code_cache_version() return super_version + (0,) class FixedLogicalComparison(UnaryScalarOp): """ Comparison to a fixed value. """ def __init__(self, *args, **kwargs): UnaryScalarOp.__init__(self, *args, **kwargs) # This is for compat with old pickles self.bool = True def __eq__(self, other): return (UnaryScalarOp.__eq__(self, other) and getattr(self, 'bool', False) == getattr(other, 'bool', False)) def __hash__(self): # bool should always be True return UnaryScalarOp.__hash__(self) def output_types(self, *input_dtypes): return [bool] if getattr(self, 'bool', False) else [int8] def L_op(self, inputs, outputs, output_gradients): x, = inputs assert outputs[0].type == bool return [x.zeros_like().astype(theano.config.floatX)] def c_code_cache_version(self): super_version = super(FixedLogicalComparison, self).c_code_cache_version() return super_version + (0,) class LT(LogicalComparison): identity = False commutative = False associative = False nfunc_spec = ('less', 2, 1) def impl(self, x, y): # built-in < don't support complex return np.less(x, y) def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() return "%(z)s = (%(x)s < %(y)s);" % locals() lt = LT() class GT(LogicalComparison): identity = False commutative = False associative = False nfunc_spec = ('greater', 2, 1) def impl(self, x, y): # built-in > don't support complex return np.greater(x, y) def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() return "%(z)s = (%(x)s > %(y)s);" % locals() gt = GT() class LE(LogicalComparison): identity = False commutative = False associative = False nfunc_spec = ('less_equal', 2, 1) def impl(self, x, y): # built-in <= don't support complex return np.less_equal(x, y) def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() return "%(z)s = (%(x)s <= %(y)s);" % locals() le = LE() class GE(LogicalComparison): identity = False commutative = False associative = False nfunc_spec = ('greater_equal', 2, 1) def impl(self, x, y): # built-in >= don't support complex return np.greater_equal(x, y) def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() return "%(z)s = (%(x)s >= %(y)s);" % locals() ge = GE() class EQ(LogicalComparison): identity = False commutative = True associative = False nfunc_spec = ('equal', 2, 1) def impl(self, x, y): return x == y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs return "%(z)s = (%(x)s == %(y)s);" % locals() eq = EQ() class NEQ(LogicalComparison): identity = False commutative = True associative = False nfunc_spec = ('not_equal', 2, 1) def impl(self, x, y): return x != y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() return "%(z)s = (%(x)s != %(y)s);" % locals() neq = NEQ() class IsNan(FixedLogicalComparison): nfunc_spec = ('isnan', 1, 1) def impl(self, x): return np.isnan(x) def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() # Discrete type can never be nan if node.inputs[0].type in discrete_types: return "%(z)s = false;" % locals() # Windows tries to be different and sometimes return -1, but we want # to be consistent with numpy (which returns True), hence the "abs". return "%(z)s = abs(isnan(%(x)s));" % locals() def c_code_cache_version(self): scalarop_version = super(IsNan, self).c_code_cache_version() return tuple(scalarop_version) + (3,) isnan = IsNan() class IsInf(FixedLogicalComparison): nfunc_spec = ('isinf', 1, 1) def impl(self, x): return np.isinf(x) def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() # Discrete type can never be inf if node.inputs[0].type in discrete_types: return "%(z)s = false;" % locals() # Note that the C isinf returns -1 for -Inf and +1 for +Inf, while # numpy simply returns True: we mimic numpy's behavior here, thus # the absolute value. return "%(z)s = abs(isinf(%(x)s));" % locals() def c_code_cache_version(self): scalarop_version = super(IsInf, self).c_code_cache_version() return tuple(scalarop_version) + (3,) isinf = IsInf() class InRange(LogicalComparison): nin = 3 def __init__(self, openlow, openhi): self.openlow = openlow self.openhi = openhi def impl(self, x, low, hi): if self.openlow and x <= low: return False elif not self.openlow and x < low: return False if self.openhi and x >= hi: return False elif not self.openhi and x > hi: return False return True def c_code(self, node, name, inputs, outputs, sub): (x, low, hi) = inputs (z,) = outputs cmp1 = '>' if self.openlow else '>=' cmp2 = '<' if self.openhi else '<=' return ("%(z)s = %(x)s %(cmp1)s %(low)s &&" " %(x)s %(cmp2)s %(hi)s;" % locals()) def get_grad(self, elem): if elem.type in complex_types: msg = ("No gradient implemented for complex numbers in " "class scalar.basic.InRange") raise NotImplementedError(msg) elif elem.type in discrete_types: return elem.zeros_like().astype(theano.config.floatX) else: return elem.zeros_like() def L_op(self, inputs, outputs, gout): (x, low, hi) = inputs (gz,) = gout grads = [] for elem in [x, low, hi]: grads.append(self.get_grad(elem)) return grads inopenrange = InRange(True, True) inclosedrange = InRange(False, False) class Switch(ScalarOp): nin = 3 nfunc_spec = ('where', 3, 1) def impl(self, cond, ift, iff): return ift if cond else iff def c_code(self, node, name, inputs, outputs, sub): (cond, ift, iff) = inputs (z,) = outputs return "%(z)s = %(cond)s ? %(ift)s : %(iff)s;" % locals() def L_op(self, inputs, outputs, gout): (cond, ift, iff) = inputs (gz,) = gout first_part = switch(cond, gz, 0.) second_part = switch(cond, 0., gz) if (outputs[0].type.dtype in discrete_types): first_part = 0. second_part = 0. # cond does affect the elements of the output so it is connected. # For the sake of making the gradient convenient we assume that # condition + epsilon always triggers the same branch as condition condition_grad = cond.zeros_like().astype(theano.config.floatX) return (condition_grad, first_part, second_part) def output_types(self, types): (cond_t, ift_t, iff_t) = types return upcast_out(ift_t, iff_t) switch = Switch() #################### # BIT-WISE OPERATORS #################### class UnaryBitOp(UnaryScalarOp): def output_types(self, *input_types): for i in input_types[0]: if i not in discrete_types: raise TypeError('input to a BitOp must have type (u)int8, ' '(u)int16, (u)int32 or (u)int64 or bool not %s' % i) return upcast_out(*input_types[0]) def grad(self, inputs, output_gradients): return [inputs[0].zeros_like().astype(theano.config.floatX)] class BinaryBitOp(BinaryScalarOp): def output_types(self, *input_types): t0, t1 = input_types[0] if t0 == bool and t1 == bool: return [bool] for i in input_types[0]: if i not in integer_types: raise TypeError('input to a BitOp must have type (u)int8, ' '(u)int16, (u)int32 or (u)int64 or ' 'be all bools not %s' % i) return upcast_out(*input_types[0]) def grad(self, inputs, output_gradients): a, b = inputs return [a.zeros_like().astype(theano.config.floatX), b.zeros_like().astype(theano.config.floatX)] class OR(BinaryBitOp): identity = 0 commutative = True associative = True nfunc_spec = ('bitwise_or', 2, 1) def impl(self, x, y): return x | y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs return "%(z)s = (%(x)s | %(y)s);" % locals() or_ = OR() class XOR(BinaryBitOp): identity = 0 commutative = True associative = True nfunc_spec = ('bitwise_xor', 2, 1) def impl(self, x, y): return x ^ y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs return "%(z)s = (%(x)s ^ %(y)s);" % locals() xor = XOR() class AND(BinaryBitOp): identity = -1 commutative = True associative = True nfunc_spec = ('bitwise_and', 2, 1) def impl(self, x, y): return x & y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs return "%(z)s = (%(x)s & %(y)s);" % locals() def c_code_cache_version(self): super_version = super(AND, self).c_code_cache_version() return super_version + (3,) and_ = AND() class Invert(UnaryBitOp): nfunc_spec = ('invert', 1, 1) def impl(self, x): return ~x def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.outputs[0].type == bool: return "%(z)s = (!%(x)s);" % locals() return "%(z)s = (~%(x)s);" % locals() invert = Invert() ############## # Arithmetic ############## class Maximum(BinaryScalarOp): commutative = True associative = True nfunc_spec = ('maximum', 2, 1) def impl(self, *inputs): # The built-in max function don't support complex type return np.maximum(*inputs) def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if any([i.type in complex_types for i in node.inputs]): raise NotImplementedError() # Test for both y>x and x>=y to detect NaN return ('%(z)s = ((%(y)s)>(%(x)s)? (%(y)s): ' '((%(x)s)>=(%(y)s)? (%(x)s): nan("")));' % locals()) def L_op(self, inputs, outputs, gout): (x, y) = inputs (gz,) = gout if gz.type in complex_types: # max is currently defined for complex_types, # but the gradient for complex is not. raise NotImplementedError() if outputs[0].type in discrete_types: return [x.zeros_like().astype(theano.config.floatX), y.zeros_like().astype(theano.config.floatX)] # This form handle the case when both value are the same. # In that case, gx will be gz, gy will be 0. e = eq(outputs[0], x) gx = e * gz gy = (constant(1, dtype=gz.dtype) - e) * gz return (gx, gy) maximum = Maximum(upcast_out, name='maximum') class Minimum(BinaryScalarOp): commutative = True associative = True nfunc_spec = ('minimum', 2, 1) def impl(self, *inputs): # The built-in min function don't support complex type return np.minimum(*inputs) def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if any([i.type in complex_types for i in node.inputs]): raise NotImplementedError() return ('%(z)s = ((%(y)s)<(%(x)s)? (%(y)s): ' '((%(x)s)<=(%(y)s)? (%(x)s): nan("")));' % locals()) def L_op(self, inputs, outputs, gout): (x, y) = inputs (gz,) = gout if gz.type in complex_types: # min is currently defined for complex_types, # but the gradient for complex is not. raise NotImplementedError() if outputs[0].type in discrete_types: return [x.zeros_like().astype(theano.config.floatX), y.zeros_like().astype(theano.config.floatX)] # This form handle the case when both value are the same. # In that case, gx will be gz, gy will be 0. e = eq(outputs[0], x) gx = e * gz gy = (constant(1, dtype=gz.dtype) - e) * gz return (gx, gy) minimum = Minimum(upcast_out, name='minimum') class Add(ScalarOp): identity = 0 commutative = True associative = True nfunc_spec = ('add', 2, 1) def impl(self, *inputs): return sum(inputs) def c_code(self, node, name, inputs, outputs, sub): (z,) = outputs op = " + " if node.outputs[0].type == bool: op = " || " if not inputs: return z + " = 0;" else: return z + " = " + op.join(inputs) + ";" def L_op(self, inputs, outputs, gout): (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if (outputs[0].type in discrete_types): assert gz is not None retval = [] for ii, inp in enumerate(inputs): if hasattr(inp, 'zeros_like'): retval.append( inp.zeros_like().astype(theano.config.floatX)) else: retval.append(grad_undefined(self, ii, inp)) else: retval = [] for i in inputs: retval += [gz] return retval add = Add(upcast_out, name='add') class Mul(ScalarOp): identity = 1 commutative = True associative = True nfunc_spec = ('multiply', 2, 1) def impl(self, *inputs): return np.product(inputs) def c_code(self, node, name, inputs, outputs, sub): (z,) = outputs op = " * " if node.outputs[0].type == bool: op = " && " if not inputs: return z + " = 1;" else: return z + " = " + op.join(inputs) + ";" def grad(self, inputs, gout): (gz,) = gout retval = [] # The following 3 lines verify that gz is complex when the # output is complex. The rest of this function make this supposition. output_type = self.output_types([i.type for i in inputs])[0] if output_type in complex_types: if gz.type not in complex_types: raise TypeError( 'Mul with output_type ' + str(output_type) + ' expected gz type to be complex, got gz with type ' + str(gz.type)) if output_type in discrete_types: return [ipt.zeros_like().astype(theano.config.floatX) for ipt in inputs] for input in inputs: if gz.type in complex_types: # zr+zi = (xr + xi)(yr + yi) # zr+zi = (xr*yr - xi*yi) + (xr yi + xi yr ) otherprod = mul(*(utils.difference(inputs, [input]))) yr = real(otherprod) yi = imag(otherprod) if input.type in complex_types: retval += [complex(yr * real(gz) + yi * imag(gz), yr * imag(gz) - yi * real(gz))] else: retval += [yr * real(gz) + yi * imag(gz)] else: retval += [mul(*([gz] + utils.difference(inputs, [input])))] return retval mul = Mul(upcast_out, name='mul') class Sub(BinaryScalarOp): nfunc_spec = ('subtract', 2, 1) def impl(self, x, y): return x - y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs return "%(z)s = %(x)s - %(y)s;" % locals() def L_op(self, inputs, outputs, gout): (x, y) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: return [x.zeros_like().astype(theano.config.floatX), y.zeros_like().astype(theano.config.floatX)] first_part = gz second_part = -gz return first_part, second_part sub = Sub(upcast_out_nobool, name='sub') def int_or_true_div(x_discrete, y_discrete): """ Return 'int' or 'true' depending on the type of division used for x / y. Parameters ---------- x_discrete : bool True if `x` is discrete ([unsigned] integer). y_discrete : bool True if `y` is discrete ([unsigned] integer). Returns ------- str 'int' if `x / y` should be an integer division, or `true` if it should be a true division. Raises ------ IntegerDivisionError If both `x_discrete` and `y_discrete` are True and `config.int_division` is set to 'raise'. Notes ----- This function is used by both scalar/basic.py and tensor/basic.py. """ if (x_discrete and y_discrete): if config.int_division == 'raise': raise IntegerDivisionError( "With `config.int_division` set to 'raise', dividing two " "integer types with '/' is forbidden to avoid confusion " "between integer and floating point divisions. Please " "use // for integer division, or if you want a float result " "either cast one of the arguments to a float or directly call " "`x.__truediv__(y)`.") elif config.int_division == 'int': warnings.warn( "Division of two integer types with x / y is deprecated, " "please use x // y for an integer division.", DeprecationWarning, stacklevel=4) return int_div elif config.int_division == 'floatX': return true_div else: raise NotImplementedError(config.int_division) else: return true_div def div_proxy(x, y): """ Proxy for either true_div or int_div, depending on types of x, y. """ f = int_or_true_div(as_scalar(x).type in discrete_types, as_scalar(y).type in discrete_types) return f(x, y) class TrueDiv(BinaryScalarOp): nfunc_spec = ('true_divide', 2, 1) def output_types(self, types): if all(t in discrete_types for t in types): return [get_scalar_type(config.floatX)] else: return super(TrueDiv, self).output_types(types) def impl(self, x, y): x = np.asarray(x) y = np.asarray(y) if all(a.dtype in discrete_types for a in (x, y)): return np.sctypeDict[config.floatX](float(x) / y) else: return x / y def c_code(self, node, name, inputs, outputs, sub): # we generate good c code only when both are complex! (x, y) = inputs (z,) = outputs if sum([node.inputs[0].type in complex_types, node.inputs[1].type in complex_types]) == 1: raise NotImplementedError('type not supported', type) if (node.inputs[0].type in discrete_types and node.inputs[1].type in discrete_types): return "%(z)s = ((double)%(x)s) / %(y)s;" % locals() return "%(z)s = %(x)s / %(y)s;" % locals() def grad(self, inputs, gout): (x, y) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() # If the output of this op is discrete, then it # it is locally flat everywhere, so the gradient # through it is 0. # This is different from it not being connected # to the output; x/y is still a function of x # and y; it's just a step function. if all(a.dtype in discrete_types for a in (x, y)): return [x.zeros_like(), y.zeros_like()] first_part = gz / y if y.type in complex_types: raise NotImplementedError() second_part = -(gz * x) / (y * y) return first_part, second_part true_div = TrueDiv(upcast_out, name='true_div') class IntDiv(BinaryScalarOp): nfunc_spec = ('floor_divide', 2, 1) complex_error = ComplexError( "Theano does not support integer division (//) on " "complex numbers, since numpy deprecated it.") def impl(self, x, y): return x // y def c_support_code(self): # We use a macro as python use % as a special string character, # and the output of c_code may be run through another level # of string formatting. return "#define THEANO_MACRO_MOD(x,y) (x % y)" def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs fail = sub['fail'] t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]]) if t in imap(str, discrete_types): x_div_y_pp = '(%(x)s / %(y)s)' % locals() x_div_y_mp = '((-%(x)s) / %(y)s)' % locals() x_mod_y_mp = 'THEANO_MACRO_MOD((-%(x)s), %(y)s)' % locals() x_div_y_pm = '(%(x)s / (-%(y)s))' % locals() x_mod_y_pm = 'THEANO_MACRO_MOD(%(x)s, (-%(y)s))' % locals() x_div_y_mm = '((-%(x)s) / (-%(y)s))' % locals() # If we are in a gpuarray kernel, %(fail)s exits the kernel, # and we do not have any error report, and we cannot set # Python error messages either, so for now we just call the # cuda function, which return a binary pattern of all 1s. div_zero = dedent(''' #ifdef KERNEL %(z)s = %(x_div_y_pp)s; #else PyErr_SetString(PyExc_ZeroDivisionError, "integer division by zero"); %(fail)s #endif ''') % locals() elif t in imap(str, float_types): # We need to call different functions of math.h # depending on the type if t == 'float32': floor = 'floorf' fmod = 'fmodf' elif t == 'float64': floor = 'floor' fmod = 'fmod' else: raise NotImplementedError('type not supported', t) x_div_y_pp = '%(floor)s(%(x)s / %(y)s)' % locals() x_div_y_mp = '%(floor)s((-%(x)s) / %(y)s)' % locals() x_mod_y_mp = '%(fmod)s((-%(x)s), %(y)s)' % locals() x_div_y_pm = '%(floor)s(%(x)s / (-%(y)s))' % locals() x_mod_y_pm = '%(fmod)s(%(x)s, (-%(y)s))' % locals() x_div_y_mm = '%(floor)s((-%(x)s) / (-%(y)s))' % locals() div_zero = '%(z)s = %(x_div_y_pp)s;' % locals() elif t in complex_types: raise self.complex_error else: raise NotImplementedError('type not supported', t) return dedent(""" if (%(y)s == 0) { %(div_zero)s; } else if (%(y)s < 0) { if (%(x)s < 0) { %(z)s = %(x_div_y_mm)s; } else { %(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1); } } else { if (%(x)s < 0) { %(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1); } else { %(z)s = %(x_div_y_pp)s; } } """) % locals() def c_code_cache_version(self): return (6,) def grad(self, inputs, g_output): return [inp.zeros_like(dtype=theano.config.floatX) for inp in inputs] int_div = IntDiv(upcast_out, name='int_div') floor_div = int_div def mod_check(x, y): if (as_scalar(x).type in complex_types or as_scalar(y).type in complex_types): # Currently forbidden. raise Mod.complex_error else: return mod(x, y) class Mod(BinaryScalarOp): nfunc_spec = ('mod', 2, 1) complex_error = ComplexError( "Theano does not support the mod operator (%) on " "complex numbers, since numpy deprecated it.") def impl(self, x, y): if isinstance(x, np.complex) or isinstance(y, np.complex): raise self.complex_error return x % y def c_code_cache_version(self): return (9,) def c_support_code(self): # We use a macro as python use % as a special string character, # and the output of c_code may be run through another level # of string formatting. return "#define THEANO_MACRO_MOD(x, y) (x % y)" def c_code(self, node, name, inputs, outputs, sub): """ We want the result to have the same sign as Python, not the other implementation of mod. """ (x, y) = inputs (z,) = outputs fail = sub['fail'] t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]]) if (str(t) in imap(str, discrete_types) or t in ['uint8', 'int8', 'uint16', 'int16'] or t in ['uint32', 'int32', 'uint64', 'int64'] or t in discrete_types): # The above or's should not be needed anymore. However, for now we # keep them out of safety, and verify they are useless with an # assert. assert str(t) in imap(str, discrete_types) x_mod_y = "THEANO_MACRO_MOD(%(x)s, %(y)s)" % locals() x_mod_ymm = "THEANO_MACRO_MOD(-%(x)s, -%(y)s)" % locals() x_mod_ypm = "THEANO_MACRO_MOD(%(x)s, -%(y)s)" % locals() x_mod_ymp = "THEANO_MACRO_MOD(-%(x)s, %(y)s)" % locals() # If we are in a gpuarray kernel, %(fail)s exits the kernel, # and we do not have any error report, and we cannot set # Python error messages either, so for now we just call the # cuda function, returning a binary pattern depending on dtype mod_zero = dedent(''' #ifdef KERNEL %(z)s = %(x_mod_y)s; #else PyErr_SetString(PyExc_ZeroDivisionError, "integer modulo by zero"); %(fail)s #endif ''') % locals() elif (str(t) in imap(str, float_types) or t in ['float32', 'float64'] or t in float_types): # The above or's should not be needed anymore. However, for now we # keep them out of safety, and verify they are useless with an # assert. assert str(t) in imap(str, float_types) x_mod_y = "fmod(%(x)s, %(y)s)" % locals() x_mod_ymm = "fmod(-%(x)s, -%(y)s)" % locals() x_mod_ypm = "fmod(%(x)s, -%(y)s)" % locals() x_mod_ymp = "fmod(-%(x)s, %(y)s)" % locals() mod_zero = "%(z)s = %(x_mod_y)s;" % locals() elif str(t) in imap(str, complex_types): raise self.complex_error else: raise NotImplementedError('type not supported', t) return dedent(""" if (%(y)s == 0) { %(mod_zero)s; } else if (%(y)s < 0){ if (%(x)s < 0){ %(z)s = -(%(x_mod_ymm)s); } else { %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0); } } else { if (%(x)s < 0){ %(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0); } else { %(z)s = %(x_mod_y)s; } } """) % locals() def L_op(self, inputs, outputs, gout): (x, y) = inputs (gz,) = gout if outputs[0].type.dtype in discrete_types: # The gradient does not flow in if the output is discrete return [x.zeros_like(dtype=theano.config.floatX), y.zeros_like(dtype=theano.config.floatX)] return [gz, -(x // y) * gz] mod = Mod(upcast_out, name='mod') class Pow(BinaryScalarOp): nfunc_spec = ('power', 2, 1) def impl(self, x, y): return x ** y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if (node.inputs[0].type in complex_types or node.inputs[1].type in complex_types): raise NotImplementedError('type not supported', type) return "%(z)s = pow(%(x)s, %(y)s);" % locals() def L_op(self, inputs, outputs, gout): (x, y) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: return [x.zeros_like().astype(theano.config.floatX), y.zeros_like().astype(theano.config.floatX)] first_part = gz * y * x ** (y - 1) second_part = gz * log(x) * x ** y second_part = switch(eq(x, 0), 0, second_part) return (first_part, second_part) def c_code_contiguous(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs if not theano.config.lib.amdlibm: raise theano.gof.utils.MethodNotDefined() # We compare the dtype AND the broadcast flag # as this function do not broadcast if (node.inputs[0].type == node.outputs[0].type and node.inputs[1].type == node.outputs[0].type and # amdlibm 3.0 do not have a float64 version of this SIMD function node.inputs[0].dtype == 'float32' and node.inputs[1].dtype == 'float32'): dtype = 'float' fct = "amd_vrsa_powf" return """ npy_intp n = PyArray_SIZE(%(z)s); %(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s); %(dtype)s * y = (%(dtype)s*) PyArray_DATA(%(y)s); %(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s); %(fct)s(n, x, y, z); """ % locals() # We compare the dtype and check we broadcast a scalar elif (node.inputs[0].type == node.outputs[0].type and node.inputs[1].dtype == node.outputs[0].dtype and all(node.inputs[1].broadcastable) and # amdlibm 3.0 do not have a float64 version of this SIMD function node.inputs[0].dtype == 'float32' and node.inputs[1].dtype == 'float32'): dtype = 'float' fct = "amd_vrsa_powxf" return """ npy_intp n = PyArray_SIZE(%(z)s); %(dtype)s * x = (%(dtype)s*) PyArray_DATA(%(x)s); %(dtype)s * y = (%(dtype)s*) PyArray_DATA(%(y)s); %(dtype)s * z = (%(dtype)s*) PyArray_DATA(%(z)s); %(fct)s(n, x, *y, z); """ % locals() raise theano.gof.utils.MethodNotDefined() pow = Pow(upcast_out_min8, name='pow') class Clip(ScalarOp): nin = 3 # The numpy.clip don't work correctly when the min is bigger then the max, # So we do not use nfunc_spec = ('clip', 3, 1) def impl(self, x, min, max): if x < min: return min elif x > max: return max else: return x def c_code(self, node, name, inputs, outputs, sub): (x, min, max) = inputs (z,) = outputs return "%(z)s = %(x)s < %(min)s ? %(min)s : %(x)s > %(max)s ? %(max)s : %(x)s;" % locals() def L_op(self, inputs, outputs, gout): (x, mn, mx) = inputs (gz,) = gout assert gz.type not in complex_types gx = ((x >= mn) & (x <= mx)) * gz gmn = (x < mn) * gz gmx = (x > mx) * gz def handle_int(v): if outputs[0].type in int_types: return v.zeros_like().astype(config.floatX) return v return list(map(handle_int, [gx, gmn, gmx])) # Don't allow complex even if numpy do # As there is no mathematical reason for this function on complex clip = Clip(upcast_out_no_complex, name='clip') class Second(BinaryScalarOp): def impl(self, x, y): return y def c_code(self, node, name, inputs, outputs, sub): (x, y) = inputs (z,) = outputs return "%(z)s = %(y)s;" % locals() def connection_pattern(self, node): # x is never connected because its elements are never used # y is connected because its elements are copied over return [[False], [True]] def grad(self, inputs, gout): (x, y) = inputs (gz,) = gout if y.type in continuous_types: # x is disconnected because the elements of x are not used return DisconnectedType()(), gz else: # when y is discrete, we assume the function can be extended # to deal with real-valued inputs by rounding them to the # nearest integer. f(x+eps) thus equals f(x) so the gradient # is zero, not disconnected or undefined return DisconnectedType()(), y.zeros_like() second = Second(transfer_type(1), name='second') class Identity(UnaryScalarOp): def impl(self, input): return input def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs return "%(z)s = %(x)s;" % locals() def grad(self, inputs, gout): (x,) = inputs (gz,) = gout if x.type in continuous_types: return gz, else: return x.zeros_like(dtype=theano.config.floatX), identity = Identity(same_out, name='identity') # CASTING OPERATIONS class Cast(UnaryScalarOp): def __init__(self, o_type, name=None): if not isinstance(o_type, Scalar): raise TypeError(o_type) super(Cast, self).__init__(specific_out(o_type), name=name) self.o_type = o_type self.ctor = getattr(np, o_type.dtype) def __str__(self): return '%s{%s}' % (self.__class__.__name__, self.o_type.dtype) def clone_float32(self): if self.o_type == float16: return convert_to_float32 return self def make_new_inplace(self, output_types_preference=None, name=None): """ This op.__init__ fct don't have the same parameter as other scalar op. This breaks the insert_inplace_optimizer optimization. This function is a fix to patch this, by ignoring the output_types_preference passed by the optimization, and replacing it by the current output type. This should only be triggered when both input and output have the same dtype anyway. """ return self.__class__(self.o_type, name) def impl(self, input): return self.ctor(input) def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.outputs[0].type == bool: return "%s = (%s) ? 1 : 0;" % (z, x) return "%s = (%s)%s;" % (z, node.outputs[0].type.dtype_specs()[1], x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout if self.o_type in continuous_types: return [gz] else: return [x.zeros_like().astype(theano.config.floatX)] def c_code_cache_version(self): s = super(Cast, self).c_code_cache_version() if s: return (4,) + s else: return s convert_to_bool = Cast(bool, name='convert_to_bool') convert_to_int8 = Cast(int8, name='convert_to_int8') convert_to_int16 = Cast(int16, name='convert_to_int16') convert_to_int32 = Cast(int32, name='convert_to_int32') convert_to_int64 = Cast(int64, name='convert_to_int64') convert_to_uint8 = Cast(uint8, name='convert_to_uint8') convert_to_uint16 = Cast(uint16, name='convert_to_uint16') convert_to_uint32 = Cast(uint32, name='convert_to_uint32') convert_to_uint64 = Cast(uint64, name='convert_to_uint64') convert_to_float16 = Cast(float16, name='convert_to_float16') convert_to_float32 = Cast(float32, name='convert_to_float32') convert_to_float64 = Cast(float64, name='convert_to_float64') convert_to_complex64 = Cast(complex64, name='convert_to_complex64') convert_to_complex128 = Cast(complex128, name='convert_to_complex128') _cast_mapping = { 'bool': convert_to_bool, 'int8': convert_to_int8, 'int16': convert_to_int16, 'int32': convert_to_int32, 'int64': convert_to_int64, 'uint8': convert_to_uint8, 'uint16': convert_to_uint16, 'uint32': convert_to_uint32, 'uint64': convert_to_uint64, 'float16': convert_to_float16, 'float32': convert_to_float32, 'float64': convert_to_float64, 'complex64': convert_to_complex64, 'complex128': convert_to_complex128} def cast(x, dtype): """ Symbolically cast `x` to a Scalar of given `dtype`. """ if dtype == 'floatX': dtype = config.floatX _x = as_scalar(x) if _x.type.dtype == dtype: return _x if _x.type.dtype.startswith('complex') and not dtype.startswith('complex'): raise TypeError('Casting from complex to real is ambiguous: consider' ' real(), imag(), angle() or abs()') return _cast_mapping[dtype](_x) class Abs(UnaryScalarOp): nfunc_spec = ('abs', 1, 1) def make_node(self, x): inputs = [as_scalar(input) for input in [x]] if inputs[0].type == complex64: outputs = [float32()] elif inputs[0].type == complex128: outputs = [float64()] else: outputs = [t() for t in self.output_types( [input.type for input in inputs])] return Apply(self, inputs, outputs) def impl(self, x): return np.abs(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if (outputs[0].type in discrete_types): if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] if x.type in float_types: return gz * sgn(x), return gz * x / abs(x), # formula works for complex and real def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs type = node.inputs[0].type if type in int_types: return "%(z)s = abs(%(x)s);" % locals() if type in float_types: return "%(z)s = fabs(%(x)s);" % locals() if type in complex_types: return "%(z)s = sqrt(%(x)s.real*%(x)s.real + %(x)s.imag*%(x)s.imag);" % locals() if node.outputs[0].type == bool: return "%(z)s = (%(x)s) ? 1 : 0;" % locals() if type in uint_types: # uint are always already absolute value. return "%(z)s = %(x)s;" % locals() raise NotImplementedError('type not supported', type) abs_ = Abs(same_out) class Sgn(UnaryScalarOp): nfunc_spec = ('sign', 1, 1) @staticmethod def output_types_preference(x): if x == bool: raise TypeError(x) return same_out_nocomplex(x) def impl(self, x): # casting to output type is handled by filter return np.sign(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout rval = x.zeros_like() if rval.type.dtype in discrete_types: rval = rval.astype(theano.config.floatX) return [rval] def c_code(self, node, name, inputs, outputs, sub): # casting is done by compiler # TODO: use copysign (x,) = inputs (z,) = outputs type = node.inputs[0].type if type in float_types: return '%(z)s = (%(x)s > 0) ? 1. : ((%(x)s < 0) ? -1. : (isnan(%(x)s) ? NAN : 0.));' % locals() if type in int_types: return "%(z)s = (%(x)s >= 0) ? (%(x)s == 0) ? 0 : 1 : -1;" % locals() raise ComplexError('complex has no sgn') def c_code_cache_version(self): s = super(Sgn, self).c_code_cache_version() if s: return (4,) + s else: # if parent is unversioned, we are too return s sgn = Sgn(name='sgn') class Ceil(UnaryScalarOp): nfunc_spec = ('ceil', 1, 1) def impl(self, x): return np.ceil(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout rval = x.zeros_like() if rval.type.dtype in discrete_types: rval = rval.astype(theano.config.floatX) return [rval] def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = ceil((%(cast)s)%(x)s);" % locals() ceil = Ceil(upgrade_to_float_no_complex, name='ceil') class Floor(UnaryScalarOp): nfunc_spec = ('floor', 1, 1) def impl(self, x): return np.floor(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout rval = x.zeros_like() if rval.type.dtype in discrete_types: rval = rval.astype(theano.config.floatX) return [rval] def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = floor((%(cast)s)%(x)s);" % locals() floor = Floor(upgrade_to_float_no_complex, name='floor') class Trunc(UnaryScalarOp): nfunc_spec = ('trunc', 1, 1) def impl(self, x): return np.trunc(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout return [x.zeros_like().astype(theano.config.floatX)] def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs return "%(z)s = %(x)s >= 0? floor(%(x)s): -floor(-%(x)s);" % locals() trunc = Trunc(upgrade_to_float_no_complex, name='trunc') class RoundHalfToEven(UnaryScalarOp): """ This function implement the same rounding than numpy: Round half to even. c/c++ round fct IS DIFFERENT! See http://en.wikipedia.org/wiki/Rounding for more details. """ nfunc_spec = ('around', 1, 1) def impl(self, x): return np.round(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout rval = x.zeros_like() if rval.type.dtype in discrete_types: rval = rval.astype(theano.config.floatX) return [rval] def c_code_cache_version(self): return (1,) def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs typ = node.outputs[0].type.dtype if typ not in ['float32', 'float64']: raise NotImplementedError("The output should be float32 or float64") if typ == 'float32': ctype = 'float' floor_function = 'floorf' else: ctype = 'double' floor_function = 'floor' return """ /* Code inspired from NumPy npy_rint implementation. */ { %(ctype)s y, r; y = %(floor_function)s(%(x)s); r = %(x)s - y; if(r > 0.5) { y += 1; } else if(r == 0.5) { r = y - 2.0*%(floor_function)s(0.5*y); /* If y is even, then r == 0 If y is odd, then r == 1 So we can just add r to y, so that y will be incremented only if he's odd. */ y += (int)r; } %(z)s = y; } """ % locals() round_half_to_even = RoundHalfToEven(same_out_float_only) def round_half_away_from_zero_(a): if a > 0: return np.floor(a + 0.5) else: return np.ceil(a - 0.5) round_half_away_from_zero_vec64 = np.vectorize( round_half_away_from_zero_, doc='round_half_away_from_zero_vec64') round_half_away_from_zero_vec32 = np.vectorize( round_half_away_from_zero_, doc='round_half_away_from_zero_vec32', otypes=['float32']) def round_half_away_from_zero_vec(a): if getattr(a, 'dtype', None) == np.float32: return round_half_away_from_zero_vec32(a) return round_half_away_from_zero_vec64(a) class RoundHalfAwayFromZero(UnaryScalarOp): """ Implement the same rounding algo as c round() fct. numpy.round fct IS DIFFERENT! See http://en.wikipedia.org/wiki/Rounding for more details. """ def impl(self, x): return round_half_away_from_zero_vec(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout rval = x.zeros_like() if rval.type.dtype in discrete_types: rval = rval.astype(theano.config.floatX) return [rval] def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.outputs[0].type.dtype in ['float32', 'float64']: return "%(z)s = round(%(x)s);" % locals() else: raise NotImplementedError("The output should be float32 or float64") round_half_away_from_zero = RoundHalfAwayFromZero(same_out_float_only) class Neg(UnaryScalarOp): # We can use numpy.negative here, because even if it gives unexpected # results on Boolean arrays, it will be passed other dtypes as Theano # does not have a Boolean type for tensors. nfunc_spec = ('negative', 1, 1) def impl(self, x): return -x def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return -gz, def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs return "%(z)s = -%(x)s;" % locals() neg = Neg(same_out_nobool, name='neg') pprint.assign(add, printing.OperatorPrinter('+', -2, 'either')) pprint.assign(mul, printing.OperatorPrinter('*', -1, 'either')) pprint.assign(sub, printing.OperatorPrinter('-', -2, 'left')) pprint.assign(neg, printing.OperatorPrinter('-', 0, 'either')) pprint.assign(true_div, printing.OperatorPrinter('/', -1, 'left')) pprint.assign(int_div, printing.OperatorPrinter('//', -1, 'left')) pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right')) pprint.assign(mod, printing.OperatorPrinter('%', -1, 'left')) class Inv(UnaryScalarOp): """ Multiplicative inverse. Also called reciprocal. """ def impl(self, x): return np.float32(1.0) / x def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return -gz / (x * x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError() return "%(z)s = 1.0 / %(x)s;" % locals() inv = Inv(upgrade_to_float, name='inv') class Log(UnaryScalarOp): """ log base e. """ nfunc_spec = ('log', 1, 1) amd_float32 = "amd_vrsa_logf" amd_float64 = "amd_vrda_log" def impl(self, x): # If x is an int8 or uint8, numpy.log will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.log(x, sig='f') return np.log(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / x, def c_code(self, node, name, inputs, outputs, sub): # todo: the version using log2 seems to be very slightly faster # on some machines for some reason, check if it's worth switching # return "%(z)s = log2(%(x)s) * 0.69314718055994529;" % locals() (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = log((%(cast)s)%(x)s);" % locals() log = Log(upgrade_to_float, name='log') class Log2(UnaryScalarOp): """ log base 2. """ nfunc_spec = ('log2', 1, 1) amd_float32 = "amd_vrsa_log2f" amd_float64 = "amd_vrda_log2" def impl(self, x): # If x is an int8 or uint8, numpy.log2 will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.log2(x, sig='f') return np.log2(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / (x * np.asarray(math.log(2.0)).astype(x.dtype)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = log2((%(cast)s)%(x)s);" % locals() log2 = Log2(upgrade_to_float, name='log2') class Log10(UnaryScalarOp): """ log base 10. """ nfunc_spec = ('log10', 1, 1) amd_float32 = "amd_vrsa_log10f" amd_float64 = "amd_vrda_log10" def impl(self, x): # If x is an int8 or uint8, numpy.log10 will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.log10(x, sig='f') return np.log10(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / (x * np.log(10.0)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = log10((%(cast)s)%(x)s);" % locals() log10 = Log10(upgrade_to_float, name='log10') class Log1p(UnaryScalarOp): """ log(1+x). """ nfunc_spec = ('log1p', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.log1p will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.log1p(x, sig='f') return np.log1p(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return [gz / (1 + x)] def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = log1p((%(cast)s)%(x)s);" % locals() log1p = Log1p(upgrade_to_float, name='log1p') class Exp(UnaryScalarOp): nfunc_spec = ('exp', 1, 1) amd_float32 = "amd_vrsa_expf" amd_float64 = "amd_vrda_exp" def impl(self, x): # If x is an int8 or uint8, numpy.exp will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.exp(x, sig='f') return np.exp(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * exp(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = exp((%(cast)s)%(x)s);" % locals() exp = Exp(upgrade_to_float, name='exp') class Exp2(UnaryScalarOp): nfunc_spec = ('exp2', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.exp2 will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.exp2(x, sig='f') return np.exp2(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * exp2(x) * log(np.cast[x.type](2)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = exp2((%(cast)s)%(x)s);" % locals() exp2 = Exp2(upgrade_to_float, name='exp2') class Expm1(UnaryScalarOp): nfunc_spec = ('expm1', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.expm1 will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.expm1(x, sig='f') return np.expm1(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * exp(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = expm1((%(cast)s)%(x)s);" % locals() def c_code_cache_version(self): return (5,) expm1 = Expm1(upgrade_to_float, name='expm1') class Sqr(UnaryScalarOp): nfunc_spec = ('square', 1, 1) def impl(self, x): return x * x def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * x * 2, def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs return "%(z)s = %(x)s * %(x)s;" % locals() sqr = Sqr(same_out, name='sqr') class Sqrt(UnaryScalarOp): nfunc_spec = ('sqrt', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.sqrt will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.sqrt(x, sig='f') return np.sqrt(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return (gz * 0.5) / sqrt(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = sqrt((%(cast)s)%(x)s);" % locals() sqrt = Sqrt(upgrade_to_float, name='sqrt') class Deg2Rad(UnaryScalarOp): nfunc_spec = ('deg2rad', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.deg2rad will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.deg2rad(x, sig='f') return np.deg2rad(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * np.asarray(np.pi / 180, gz.type), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) return "%(z)s = %(x)s * (M_PI / 180.0);" % locals() deg2rad = Deg2Rad(upgrade_to_float, name='deg2rad') class Rad2Deg(UnaryScalarOp): nfunc_spec = ('rad2deg', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.rad2deg will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.rad2deg(x, sig='f') return np.rad2deg(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * np.asarray(180. / np.pi, gz.type), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) return "%(z)s = %(x)s * (180.0 / M_PI);" % locals() rad2deg = Rad2Deg(upgrade_to_float, name='rad2deg') class Cos(UnaryScalarOp): nfunc_spec = ('cos', 1, 1) amd_float32 = "amd_vrsa_cosf" amd_float64 = "amd_vrda_cos" def impl(self, x): # If x is an int8 or uint8, numpy.cos will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.cos(x, sig='f') return np.cos(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return -gz * sin(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = cos((%(cast)s)%(x)s);" % locals() cos = Cos(upgrade_to_float, name='cos') class ArcCos(UnaryScalarOp): nfunc_spec = ('arccos', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.arccos will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.arccos(x, sig='f') return np.arccos(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return - gz / sqrt(np.cast[x.type](1) - sqr(x)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = acos((%(cast)s)%(x)s);" % locals() arccos = ArcCos(upgrade_to_float, name='arccos') class Sin(UnaryScalarOp): nfunc_spec = ('sin', 1, 1) amd_float32 = "amd_vrsa_sinf" amd_float64 = "amd_vrda_sin" def impl(self, x): # If x is an int8 or uint8, numpy.sin will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.sin(x, sig='f') return np.sin(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * cos(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = sin((%(cast)s)%(x)s);" % locals() sin = Sin(upgrade_to_float, name='sin') class ArcSin(UnaryScalarOp): nfunc_spec = ('arcsin', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.arcsin will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.arcsin(x, sig='f') return np.arcsin(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / sqrt(np.cast[x.type](1) - sqr(x)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = asin((%(cast)s)%(x)s);" % locals() arcsin = ArcSin(upgrade_to_float, name='arcsin') class Tan(UnaryScalarOp): nfunc_spec = ('tan', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.tan will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.tan(x, sig='f') return np.tan(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / sqr(cos(x)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = tan((%(cast)s)%(x)s);" % locals() tan = Tan(upgrade_to_float, name='tan') class ArcTan(UnaryScalarOp): nfunc_spec = ('arctan', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.arctan will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.arctan(x, sig='f') return np.arctan(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / (np.cast[x.type](1) + sqr(x)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = atan((%(cast)s)%(x)s);" % locals() arctan = ArcTan(upgrade_to_float, name='arctan') class ArcTan2(BinaryScalarOp): nfunc_spec = ('arctan2', 2, 1) def impl(self, y, x): # If x and y are int8 or uint8, numpy.arctan2 will compute the result # in half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): y_dtype = str(getattr(x, 'dtype', '')) if y_dtype in ('int8', 'uint8'): return np.arctan2(y, x, sig='f') return np.arctan2(y, x) def L_op(self, inputs, outputs, gout): (y, x) = inputs (gz,) = gout if gz.type in complex_types: raise NotImplementedError() else: if outputs[0].type in discrete_types: if x.type in discrete_types: gx = x.zeros_like(dtype=theano.config.floatX) else: gx = x.zeros_like() if y.type in discrete_types: gy = y.zeros_like(dtype=theano.config.floatX) else: gy = y.zeros_like() return [gx, gy] # If the output is float, the gradient should flow, # even if the inputs are ints return [gz * x / (sqr(x) + sqr(y)), gz * neg(y) / (sqr(x) + sqr(y))] def c_code(self, node, name, inputs, outputs, sub): (y, x) = inputs (z,) = outputs if (node.inputs[0].type in complex_types or node.inputs[1].type in complex_types): raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = atan2((%(cast)s)%(y)s, (%(cast)s)%(x)s);" % locals() arctan2 = ArcTan2(upgrade_to_float, name='arctan2') class Cosh(UnaryScalarOp): """ cosh(x) = (exp(x) + exp(-x)) / 2. """ nfunc_spec = ('cosh', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.cosh will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.cosh(x, sig='f') return np.cosh(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * sinh(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = cosh((%(cast)s)%(x)s);" % locals() cosh = Cosh(upgrade_to_float, name='cosh') class ArcCosh(UnaryScalarOp): nfunc_spec = ('arccosh', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.arccosh will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.arccosh(x, sig='f') return np.arccosh(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / sqrt(sqr(x) - np.cast[x.type](1)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = acosh((%(cast)s)%(x)s);" % locals() arccosh = ArcCosh(upgrade_to_float, name='arccosh') class Sinh(UnaryScalarOp): """ sinh(x) = (exp(x) - exp(-x)) / 2. """ nfunc_spec = ('sinh', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.sinh will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.sinh(x, sig='f') return np.sinh(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * cosh(x), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = sinh((%(cast)s)%(x)s);" % locals() sinh = Sinh(upgrade_to_float, name='sinh') class ArcSinh(UnaryScalarOp): nfunc_spec = ('arcsinh', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.arcsinh will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.arcsinh(x, sig='f') return np.arcsinh(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / sqrt(sqr(x) + np.cast[x.type](1)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = asinh((%(cast)s)%(x)s);" % locals() arcsinh = ArcSinh(upgrade_to_float, name='arcsinh') class Tanh(UnaryScalarOp): """ tanh(x) = sinh(x) / cosh(x) = (exp(2*x) - 1) / (exp(2*x) + 1). """ nfunc_spec = ('tanh', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.tanh will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.tanh(x, sig='f') return np.tanh(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz * (1 - sqr(tanh(x))), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = tanh((%(cast)s)%(x)s);" % locals() tanh = Tanh(upgrade_to_float, name='tanh') class ArcTanh(UnaryScalarOp): nfunc_spec = ('arctanh', 1, 1) def impl(self, x): # If x is an int8 or uint8, numpy.arctanh will compute the result in # half-precision (float16), where we want float32. x_dtype = str(getattr(x, 'dtype', '')) if x_dtype in ('int8', 'uint8'): return np.arctanh(x, sig='f') return np.arctanh(x) def L_op(self, inputs, outputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: raise NotImplementedError() if outputs[0].type in discrete_types: if x.type in discrete_types: return [x.zeros_like(dtype=theano.config.floatX)] else: return [x.zeros_like()] return gz / (np.cast[x.type](1) - sqr(x)), def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: raise NotImplementedError('type not supported', type) cast = node.outputs[0].type.dtype_specs()[1] return "%(z)s = atanh((%(cast)s)%(x)s);" % locals() arctanh = ArcTanh(upgrade_to_float, name='arctanh') class Real(UnaryScalarOp): """ Extract the real coordinate of a complex number. """ # numpy.real(float32) return a view on the inputs. # nfunc_spec = ('real', 1, 1) def impl(self, x): return np.real(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout return [complex(gz, 0)] real = Real(real_out, name='real') class Imag(UnaryScalarOp): nfunc_spec = ('imag', 1, 1) def impl(self, x): return np.imag(x) def grad(self, inputs, gout): (x,) = inputs (gz,) = gout if x.type in complex_types: return [complex(0, gz)] elif x.type in float_types: return [second(x, 0)] else: return [x.zeros_like(dtype=theano.config.floatX)] imag = Imag(real_out, name='imag') class Angle(UnaryScalarOp): nfunc_spec = ('angle', 1, 1) def impl(self, x): return np.angle(x) def grad(self, inputs, gout): # y = x.imag # r = sqrt(y**2 + x.real**2) # g = y/r # if x == 0 and y == 0: # theta = 0 # elif x >= 0: # theta = numpy.arcsin(g) # else: # theta = -numpy.arcsin(g)+numpy.pi (c,) = inputs (gtheta,) = gout x = real(c) y = imag(c) r = abs(c) gr = -gtheta * y / (r ** 2 * sqrt(1 - (y / r) ** 2)) gx = gr * x / r gy = gr * y / r if c in complex_types: return [cast(complex(gx, gy), x.type.dtype)] elif c in float_types: return [cast(second(x, 0), x.type.dtype)] else: return [c.zeros_like(dtype=theano.config.floatX)] angle = Angle(specific_out(float64), name='angle') class Complex(BinaryScalarOp): @staticmethod def output_types_preference(x, y): if x in complex_types: raise TypeError(x) if y in complex_types: raise TypeError(y) up = Scalar.upcast(x, y) if up in ('float64', 'int64', 'uint64', 'int32', 'uint32'): return [complex128] else: return [complex64] def impl(self, x, y): return np.complex(x, y) def grad(self, inputs, gout): (x, y) = inputs (gz,) = gout return [cast(real(gz), x.type.dtype), cast(imag(gz), y.type.dtype)] complex = Complex(name='complex') class Conj(UnaryScalarOp): nfunc_spec = ('conj', 1, 1) def impl(self, x): return np.conj(x) def c_code(self, node, name, inputs, outputs, sub): (x,) = inputs (z,) = outputs if node.inputs[0].type in complex_types: # For non complex, th raise NotImplementedError('type have no c code', node.inputs[0].type) return "%(z)s = %(x)s;" % locals() conj = Conj(same_out_min8, name='conj') class ComplexFromPolar(BinaryScalarOp): @staticmethod def output_types_preference(x, y): return Complex.output_types_preference(x, y) def impl(self, r, theta): if r < 0: raise ValueError('polar radius must be non-negative', r) x = r * np.cos(theta) y = r * np.sin(theta) if x.dtype == 'float32': return np.complex64(np.complex(x, y)) else: return np.complex128(np.complex(x, y)) def grad(self, inputs, gout): (r, theta) = inputs (gz,) = gout gr = gz * complex_from_polar(1, theta) gtheta = gz * complex_from_polar(r, -theta) return [gr, gtheta] complex_from_polar = ComplexFromPolar(name='complex_from_polar') class Composite(ScalarOp): """ Composite is an Op that takes a graph of scalar operations and produces c code for the whole graph. Its purpose is to implement loop fusion. Composite depends on all the Ops in its graph having C code. """ init_param = ('inputs', 'outputs') def __str__(self): if self.name is None: self.init_name() return self.name def make_new_inplace(self, output_types_preference=None, name=None): """ This op.__init__ fct don't have the same parameter as other scalar op. This break the insert_inplace_optimizer optimization. This fct allow fix patch this. """ d = dict([(k, getattr(self, k)) for k in self.init_param]) out = self.__class__(**d) if name: out.name = name else: name = out.name super(Composite, out).__init__(output_types_preference, name) return out def init_c_code(self): """ Assemble the C code for this Composite Op. The result is assigned to `self._c_code`. """ # It was already called if hasattr(self, '_c_code'): return subd = dict(chain( ((e, "%%(i%i)s" % i) for i, e in enumerate(self.fgraph.inputs)), ((e, "%%(o%i)s" % i) for i, e in enumerate(self.fgraph.outputs)))) for var in self.fgraph.variables: if var.owner is None: if var not in self.fgraph.inputs: # This is an orphan if isinstance(var, Constant): subd[var] = var.type.c_literal(var.data) else: raise ValueError( "All orphans in the fgraph to Composite must" " be Constant instances.") elif (any(i.dtype == 'float16' for i in var.owner.inputs) or any(o.dtype == 'float16' for o in var.owner.outputs)): # flag for elemwise ops to check. self.inner_float16 = True _c_code = "{\n" self.nodenames = ["%(nodename)s_" + ('subnode%i' % j) for j, n in enumerate(self.fgraph.toposort())] i = 0 for j, node in enumerate(self.fgraph.toposort()): for output in node.outputs: if output not in subd: i += 1 name = "V%%(id)s_tmp%i" % i subd[output] = name _c_code += "%s %s;\n" % ( output.type.dtype_specs()[1], name) s = node.op.c_code( node, self.nodenames[j], [subd[input] for input in node.inputs], [subd[output] for output in node.outputs], dict(fail="%(fail)s", id="%%(id)s_%i" % j)) _c_code += s _c_code += "\n" _c_code += "}\n" self._c_code = _c_code def init_py_impls(self): """ Return a list of functions that compute each output of self. """ # In the case where the graph is a dag, but not a tree like: # add(*1 -> mul(x, y), *1) # We have an efficient way to build the executable (we build # and traverse each node only once). # But we don't have an efficient execution. We will execute # like a tree, so nodes that have more then 1 client will be # executed as many times as there number of clients. In the # example aboce, it will calculate *1 twice. Doing otherwise # imply making a complicated execution engine. # We need the fast creation of the executor as we always do it # even if we will use the c code. The Python implementation is # already slow, so it is not as much important to have a fast # execution there. memo = {} def compose_impl(r): if r in memo: return memo[r] if r in self.fgraph.inputs: idx = self.fgraph.inputs.index(r) def f(inputs): return inputs[idx] memo[r] = f return f elif r.owner is None: # in fgraph.orphans: def f(inputs): return r.data memo[r] = f return f node = r.owner producers = [compose_impl(input) for input in node.inputs] def f(inputs): return node.op.impl(*[p(inputs) for p in producers]) memo[r] = f return f self._impls = [compose_impl(r) for r in self.fgraph.outputs] def init_name(self): """ Return a readable string representation of self.fgraph. """ rval = self.name if rval is None: for i, r in enumerate(self.fgraph.inputs): r.name = 'i%i' % i for i, r in enumerate(self.fgraph.outputs): r.name = 'o%i' % i io = set(self.fgraph.inputs + self.fgraph.outputs) for i, r in enumerate(self.fgraph.variables): if r not in io and len(r.clients) > 1: r.name = 't%i' % i rval = "Composite{%s}" % ', '.join([pprint(output) for output in self.fgraph.outputs]) self.name = rval def init_fgraph(self): # The clone done by FunctionGraph is needed as we don't want # the fgraph to be set to the variable as we need to pickle # them for the cache of c module to work. fgraph = FunctionGraph(self.inputs, self.outputs) gof.MergeOptimizer().optimize(fgraph) for node in fgraph.apply_nodes: if not isinstance(node.op, ScalarOp): raise ValueError("The fgraph to Composite must be exclusively" " composed of ScalarOp instances.") self.fgraph = fgraph def __init__(self, inputs, outputs): # We need to clone the graph as sometimes its nodes already # contain a reference to an fgraph. As we want the Composite # to be pickable, we can't have reference to fgraph. # Also, if there is Composite in the inner graph, we want to # remove them. In that case, we do a more complicated clone # that will flatten Composite. We don't need to do this # recusively, as the way the fusion optimizer work, we have # only 1 new Composite each time at the output. for i in inputs: assert i not in outputs # This isn't supported, use identity if len(outputs) > 1 or not any([isinstance(var.owner.op, Composite) for var in outputs]): # No inner Composite inputs, outputs = gof.graph.clone(inputs, outputs) else: # Inner Composite that we need to flatten assert len(outputs) == 1 # 1. Create a new graph from inputs up to the # Composite res = theano.compile.rebuild_collect_shared( inputs=inputs, outputs=outputs[0].owner.inputs, copy_inputs_over=False) # Clone also the inputs # 2. We continue this partial clone with the graph in # the inner Composite res2 = theano.compile.rebuild_collect_shared( inputs=outputs[0].owner.op.inputs, outputs=outputs[0].owner.op.outputs, replace=dict(izip(outputs[0].owner.op.inputs, res[1])) ) assert len(res2[1]) == len(outputs) assert len(res[0]) == len(inputs) assert res[0] != inputs inputs, outputs = res[0], res2[1] # Next assert comment just for speed # assert not any([isinstance(node.op, Composite) for node in # theano.gof.graph.ops(inputs, outputs)]) self.inputs = copy(inputs) self.outputs = copy(outputs) self.inputs_type = tuple([input.type for input in inputs]) self.outputs_type = tuple([output.type for output in outputs]) self.nin = len(inputs) self.nout = len(outputs) self.init_fgraph() # self.fgraph # Postpone the creation in case it isn't needed. # self.init_name() # self.name self.name = None self.prepare_node_called = set() def prepare_node(self, node, storage_map, compute_map, impl): if impl == 'py': self.init_py_impls() # self._impls if impl not in self.prepare_node_called: for n in theano.gof.graph.list_of_nodes(self.inputs, self.outputs): n.op.prepare_node(n, None, None, impl) self.prepare_node_called.add(impl) def clone_float32(self): # This will not modify the fgraph or the nodes new_ins, new_outs = composite_f32.apply(self.fgraph) return Composite(new_ins, new_outs) def output_types(self, input_types): if tuple(input_types) != self.inputs_type: raise TypeError("Wrong types for Composite. Expected %s, got %s." % (self.inputs_type, tuple(input_types))) return self.outputs_type def make_node(self, *inputs): if (tuple([i.type for i in self.inputs]) == tuple([i.type for i in inputs])): return super(Composite, self).make_node(*inputs) else: # Make a new op with the right input type. assert len(inputs) == self.nin res = theano.compile.rebuild_collect_shared( self.outputs, replace=dict(izip(self.inputs, inputs)), rebuild_strict=False) # After rebuild_collect_shared, the Variable in inputs # are not necessarily in the graph represented by res. # res[2][0] is a dict that map from the original variable to the # cloned variable. cloned_inputs = [res[2][0][i] for i in inputs] node = Composite(cloned_inputs, res[1]).make_node(*inputs) return node def perform(self, node, inputs, output_storage): for storage, impl in zip(output_storage, self._impls): storage[0] = impl(inputs) def impl(self, *inputs): output_storage = [[None] for i in xrange(self.nout)] self.perform(None, inputs, output_storage) ret = utils.to_return_values([storage[0] for storage in output_storage]) if self.nout > 1: ret = tuple(ret) return ret def grad(self, inputs, output_grads): raise NotImplementedError("grad is not implemented for Composite") def c_code(self, node, nodename, inames, onames, sub): self.init_c_code() d = dict(chain(izip(("i%i" % i for i in xrange(len(inames))), inames), izip(("o%i" % i for i in xrange(len(onames))), onames)), **sub) d['nodename'] = nodename if 'id' not in sub: # The use of a dummy id is safe as the code is in a separate block. # It won't generate conflicting variable name. d['id'] = '_DUMMY_ID_' return self._c_code % d def c_code_cache_version(self): rval = [3] for x in self.fgraph.toposort(): xv = x.op.c_code_cache_version() if xv: rval.append(xv) else: return () return tuple(rval) def c_support_code(self): rval = [] for subnode in self.fgraph.toposort(): try: rval.append(subnode.op.c_support_code().strip()) except gof.utils.MethodNotDefined: pass # remove duplicate code blocks return "\n".join(sorted(set(rval))) def c_support_code_apply(self, node, name): self.init_c_code() rval = [] for subnode, subnodename in zip(self.fgraph.toposort(), self.nodenames): try: subnode_support_code = subnode.op.c_support_code_apply( subnode, subnodename % dict(nodename=name)) if subnode_support_code: rval.append(subnode_support_code) except gof.utils.MethodNotDefined: pass # there should be no need to remove duplicate code blocks because # each block should have been specialized for the given nodename. # Any block that isn't specialized should be returned via # c_support_code instead of c_support_code_apply. return "\n".join(rval) def __eq__(self, other): if self is other: return True if (type(self) != type(other) or self.nin != other.nin or self.nout != other.nout): return False # see __hash__ for comment on why there is no mention of fgraph # or module cache key here. self.init_c_code() # self._c_code and self.nodenames other.init_c_code() return (self._c_code == other._c_code) def __hash__(self): self.init_c_code() # self._c_code and self.nodenames rval = hash((type(self), self.nin, self.nout, self._c_code)) # Note that in general, the configparser settings at the time # of code generation (__init__) affect the semantics of this Op. # This function assumes that all relevant info about the configparser # is embodied in _c_code. So the _c_code, rather than self.fgraph, # is the signature of the semantics of this Op. # _c_code is preserved through unpickling, so the Op will not change # semantics when it is reloaded with different configparser # settings. return rval def __getstate__(self): rval = dict(self.__dict__) rval.pop('_impls', None) rval.pop('prepare_node_called', None) del rval['fgraph'] return rval def __setstate__(self, d): self.__dict__.update(d) # We must call init to set fgraph and _impls again, as otherwise # self.perform will not work. self.prepare_node_called = set() self.init_fgraph() self.init_py_impls() class Compositef32(object): # This is a dict of scalar op classes that need special handling special = {} def apply(self, fgraph): mapping = {} topo = fgraph.toposort() for i in fgraph.inputs: if i.dtype == 'float16': mapping[i] = get_scalar_type('float32')() if hasattr(i.tag, 'test_value'): mapping[i].tag.test_value = i.tag.test_value else: mapping[i] = i for node in topo: # Patch up for constants for i in node.inputs: if i not in mapping: assert type(i) is ScalarConstant if i.type == float16: ni = ScalarConstant(float32, i.data) else: ni = i mapping[i] = ni if type(node.op) in self.special: self.special[type(node.op)](node, mapping) continue new_node = node.clone_with_new_inputs( [mapping[inp] for inp in node.inputs], strict=False) # make sure we don't produce any float16. assert not any(o.dtype == 'float16' for o in new_node.outputs) for o, no in zip(node.outputs, new_node.outputs): mapping[o] = no new_ins = [mapping[inp] for inp in fgraph.inputs] new_outs = [mapping[out] for out in fgraph.outputs] return new_ins, new_outs composite_f32 = Compositef32() def handle_cast(node, mapping): inp = mapping[node.inputs[0]] out = node.outputs[0] node_ok = False if node.op.o_type == float16: if node.inputs[0].type == float32: # cast f32 -> f16, remove mapping[out] = inp return else: # cast to f16, convert to f32 new_out = cast(inp, 'float32') # change the node for the following if node = new_out.owner mapping[out] = new_out node_ok = True if node.inputs[0].type == float16: if node.op.o_type == inp.type: # cast f16 to new input type, remove mapping[out] = inp return if not node_ok: new_node = node.clone_with_new_inputs([inp], strict=False) mapping[out] = new_node.outputs[0] Compositef32.special[Cast] = handle_cast def handle_composite(node, mapping): new_op = node.op.clone_float32() new_outs = new_op(*[mapping[i] for i in node.inputs], return_list=True) assert len(new_outs) == len(node.outputs) for o, no in zip(node.outputs, new_outs): mapping[o] = no Compositef32.special[Composite] = handle_composite