# dataset.py """Module for Dataset class Overview of Dicom object model: Dataset(derived class of Python's dict class) contains DataElement instances (DataElement is a class with tag, VR, value) the value can be a Sequence instance (Sequence is derived from Python's list), or just a regular value like a number, string, etc., or a list of regular values, e.g. a 3d coordinate Sequence's are a list of Datasets (note recursive nature here) """ # # Copyright (c) 2008-2013 Darcy Mason # This file is part of pydicom, released under a modified MIT license. # See the file license.txt included with this distribution, also # available at http://pydicom.googlecode.com # import sys from sys import byteorder sys_is_little_endian = (byteorder == 'little') import logging logger = logging.getLogger('pydicom') import inspect # for __dir__ from dicom.charset import default_encoding, convert_encodings from dicom.datadict import dictionaryVR from dicom.datadict import tag_for_name, all_names_for_tag from dicom.tag import Tag, BaseTag from dicom.dataelem import DataElement, DataElement_from_raw, RawDataElement from dicom.UID import NotCompressedPixelTransferSyntaxes from dicom.tagtools import tag_in_exception import os.path import io import dicom # for write_file import dicom.charset have_numpy = True try: import numpy except: have_numpy = False stat_available = True try: from os import stat except: stat_available = False class PropertyError(Exception): """For AttributeErrors caught in a property, so do not go to __getattr__""" # http://docs.python.org/release/3.1.3/tutorial/errors.html#tut-userexceptions pass class Dataset(dict): """A collection (dictionary) of Dicom `DataElement` instances. Example of two ways to retrieve or set values: 1. dataset[0x10, 0x10].value --> patient's name 2. dataset.PatientName --> patient's name Example (2) uses DICOM "keywords", defined starting in 2011 standard. PatientName is not actually a member of the object, but unknown member requests are checked against the DICOM dictionary. If the name matches a DicomDictionary descriptive string, the corresponding tag is used to look up or set the `DataElement` instance's value. :attribute indent_chars: for string display, the characters used to indent nested Data Elements (e.g. sequence items). Default is three spaces. """ indent_chars = " " def __init__(self, *args, **kwargs): self._parent_encoding = kwargs.get('parent_encoding', default_encoding) dict.__init__(self, *args) def add(self, data_element): """Equivalent to dataset[data_element.tag] = data_element.""" self[data_element.tag] = data_element def add_new(self, tag, VR, value): """Create a new DataElement instance and add it to this Dataset.""" data_element = DataElement(tag, VR, value) # use data_element.tag since DataElement verified it self[data_element.tag] = data_element def data_element(self, name): """Return the full data_element instance for the given descriptive name :param name: a DICOM keyword :returns: a DataElement instance in this dataset with the given name If the tag for that name is not found, returns None """ tag = tag_for_name(name) if tag: return self[tag] return None def __contains__(self, name): """Extend dict.__contains__() to handle DICOM keywords. This is called for code like: ``if 'SliceLocation' in dataset``. """ if isinstance(name, (str, unicode)): tag = tag_for_name(name) else: try: tag = Tag(name) except: return False if tag: return dict.__contains__(self, tag) else: return dict.__contains__(self, name) # will no doubt raise an exception def decode(self): """Apply character set decoding to all data elements. See DICOM PS3.5-2008 6.1.1. """ # Find specific character set. 'ISO_IR 6' is default # May be multi-valued, but let dicom.charset handle all logic on that dicom_character_set = self._character_set # Shortcut to the decode function in dicom.charset decode_data_element = dicom.charset.decode # Callback for walk(), to decode the chr strings if necessary # This simply calls the dicom.charset.decode function def decode_callback(ds, data_element): if data_element.VR == 'SQ': [dset.decode() for dset in data_element.value] else: decode_data_element(data_element, dicom_character_set) self.walk(decode_callback, recursive=False) def __delattr__(self, name): """Intercept requests to delete an attribute by name, e.g. del ds.name If name is a DICOM keyword, then delete the corresponding tag and data_element. Else, delete an instance (python) attribute as any other class would do """ # First check if a valid DICOM keyword and if we have that data element tag = tag_for_name(name) if tag and tag in self: dict.__delitem__(self, tag) # direct to dict as we know we have key # If not a DICOM name in this dataset, check for regular instance name # can't do delete directly, that will call __delattr__ again elif name in self.__dict__: del self.__dict__[name] # Not found, raise an error in same style as python does else: raise AttributeError(name) def __delitem__(self, key): """Intercept requests to delete an attribute by key, e.g. del ds[tag]""" # Assume is a standard tag (for speed in common case) try: dict.__delitem__(self, key) # If not a standard tag, than convert to Tag and try again except KeyError: tag = Tag(key) dict.__delitem__(self, tag) def __dir__(self): """Give a list of attributes available in the dataset List of attributes is used, for example, in auto-completion in editors or command-line environments. """ # Force zip object into a list in case of python3. Also backwards # compatible meths = set(list(zip( *inspect.getmembers(Dataset, inspect.isroutine)))[0]) props = set(list(zip( *inspect.getmembers(Dataset, inspect.isdatadescriptor)))[0]) dicom_names = set(self.dir()) alldir = sorted(props | meths | dicom_names) return alldir def dir(self, *filters): """Return an alphabetical list of data_element keywords in the dataset. Intended mainly for use in interactive Python sessions. :param filters: zero or more string arguments to the function. Used for case-insensitive match to any part of the DICOM name. :returns: All data_element names in this dataset matching the filters. If no filters, return all DICOM keywords in the dataset """ allnames = [] for tag, data_element in self.items(): allnames.extend(all_names_for_tag(tag)) # remove blanks - tags without valid names (e.g. private tags) allnames = [x for x in allnames if x] # Store found names in a dict, so duplicate names appear only once matches = {} for filter_ in filters: filter_ = filter_.lower() match = [x for x in allnames if x.lower().find(filter_) != -1] matches.update(dict([(x, 1) for x in match])) if filters: names = sorted(matches.keys()) return names else: return sorted(allnames) def get(self, key, default=None): """Extend dict.get() to handle DICOM keywords""" if isinstance(key, (str, unicode)): try: return getattr(self, key) except AttributeError: return default else: # is not a string, try to make it into a tag and then hand it # off to the underlying dict if not isinstance(key, BaseTag): try: key = Tag(key) except: raise TypeError("Dataset.get key must be a string or tag") try: return_val = self.__getitem__(key) except KeyError: return_val = default return return_val def __getattr__(self, name): """Intercept requests for unknown Dataset python-attribute names. If the name matches a Dicom keyword, return the value for the data_element with the corresponding tag. """ # __getattr__ only called if instance cannot find name in self.__dict__ # So, if name is not a dicom string, then is an error tag = tag_for_name(name) if tag is None: raise AttributeError("Dataset does not have attribute " "'{0:s}'.".format(name)) tag = Tag(tag) if tag not in self: raise AttributeError("Dataset does not have attribute " "'{0:s}'.".format(name)) else: # do have that dicom data_element return self[tag].value @property def _character_set(self): """ :return: """ char_set = self.get('SpecificCharacterSet', None) if not char_set: char_set = self._parent_encoding else: char_set = convert_encodings(char_set) return char_set def __getitem__(self, key): """Operator for dataset[key] request.""" tag = Tag(key) data_elem = dict.__getitem__(self, tag) if isinstance(data_elem, DataElement): return data_elem elif isinstance(data_elem, tuple): # If a deferred read, then go get the value now if data_elem.value is None: from dicom.filereader import read_deferred_data_element data_elem = read_deferred_data_element(self.fileobj_type, self.filename, self.timestamp, data_elem) if tag != (0x08, 0x05): character_set = self._character_set else: character_set = default_encoding # Not converted from raw form read from file yet; do so now self[tag] = DataElement_from_raw(data_elem, character_set) return dict.__getitem__(self, tag) def get_item(self, key): """Return the raw data element if possible. It will be raw if the user has never accessed the value, or set their own value. Note if the data element is a deferred-read element, then it is read and converted before being returned """ tag = Tag(key) data_elem = dict.__getitem__(self, tag) # If a deferred read, return using __getitem__ to read and convert it if isinstance(data_elem, tuple) and data_elem.value is None: return self[key] return data_elem def group_dataset(self, group): """Return a Dataset containing only data_elements of a certain group. :param group: the group part of a dicom (group, element) tag. :returns: a dataset instance containing data elements of the group specified """ ds = Dataset() ds.update(dict([(tag, data_element) for tag, data_element in self.items() if tag.group == group])) return ds def __iter__(self): """Method to iterate through the dataset, returning data_elements. e.g.: for data_element in dataset: do_something... The data_elements are returned in DICOM order, i.e. in increasing order by tag value. Sequence items are returned as a single data_element; it is up to the calling code to recurse into the Sequence items if desired """ # Note this is different than the underlying dict class, # which returns the key of the key:value mapping. # Here the value is returned (but data_element.tag has the key) taglist = sorted(self.keys()) for tag in taglist: yield self[tag] def _pixel_data_numpy(self): """Return a NumPy array of the pixel data. NumPy is a numerical package for python. It is used if available. :raises TypeError: if no pixel data in this dataset. :raises ImportError: if cannot import numpy. """ if 'PixelData' not in self: raise TypeError("No pixel data found in this dataset.") if not have_numpy: msg = "The Numpy package is required to use pixel_array, and numpy could not be imported.\n" raise ImportError(msg) # determine the type used for the array need_byteswap = (self.is_little_endian != sys_is_little_endian) # Make NumPy format code, e.g. "uint16", "int32" etc # from two pieces of info: # self.PixelRepresentation -- 0 for unsigned, 1 for signed; # self.BitsAllocated -- 8, 16, or 32 format_str = '%sint%d' % (('u', '')[self.PixelRepresentation], self.BitsAllocated) try: numpy_format = numpy.dtype(format_str) except TypeError: msg = ("Data type not understood by NumPy: " "format='%s', PixelRepresentation=%d, BitsAllocated=%d") raise TypeError(msg % (numpy_format, self.PixelRepresentation, self.BitsAllocated)) # Have correct Numpy format, so create the NumPy array arr = numpy.fromstring(self.PixelData, numpy_format) # XXX byte swap - may later handle this in read_file!!? if need_byteswap: arr.byteswap(True) # True means swap in-place, don't make a new copy # Note the following reshape operations return a new *view* onto arr, but don't copy the data if 'NumberOfFrames' in self and self.NumberOfFrames > 1: if self.SamplesPerPixel > 1: arr = arr.reshape(self.SamplesPerPixel, self.NumberOfFrames, self.Rows, self.Columns) else: arr = arr.reshape(self.NumberOfFrames, self.Rows, self.Columns) else: if self.SamplesPerPixel > 1: if self.BitsAllocated == 8: arr = arr.reshape(self.SamplesPerPixel, self.Rows, self.Columns) else: raise NotImplementedError("This code only handles SamplesPerPixel > 1 if Bits Allocated = 8") else: arr = arr.reshape(self.Rows, self.Columns) return arr # Use by pixel_array property def _get_pixel_array(self): # Check if pixel data is in a form we know how to make into an array # XXX uses file_meta here, should really only be thus for FileDataset if self.file_meta.TransferSyntaxUID not in NotCompressedPixelTransferSyntaxes: raise NotImplementedError("Pixel Data is compressed in a format pydicom does not yet handle. Cannot return array") # Check if already have converted to a NumPy array # Also check if self.PixelData has changed. If so, get new NumPy array already_have = True if not hasattr(self, "_pixel_array"): already_have = False elif self._pixel_id != id(self.PixelData): already_have = False if not already_have: self._pixel_array = self._pixel_data_numpy() self._pixel_id = id(self.PixelData) # is this guaranteed to work if memory is re-used?? return self._pixel_array @property def pixel_array(self): """Return the pixel data as a NumPy array""" try: return self._get_pixel_array() except AttributeError: t, e, tb = sys.exc_info() raise PropertyError("AttributeError in pixel_array property: " + e.args[0]), None, tb # Format strings spec'd according to python string formatting options # See http://docs.python.org/library/stdtypes.html#string-formatting-operations default_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" default_sequence_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" def formatted_lines(self, element_format=default_element_format, sequence_element_format=default_sequence_element_format, indent_format=None): """A generator to give back a formatted string representing each line one at a time. Example: for line in dataset.formatted_lines("%(name)s=%(repval)s", "SQ:%(name)s=%(repval)s"): print(line) See the source code for default values which illustrate some of the names that can be used in the format strings indent_format -- not used in current version. Placeholder for future functionality. """ for data_element in self.iterall(): # Get all the attributes possible for this data element (e.g. # gets descriptive text name too) # This is the dictionary of names that can be used in the format string elem_dict = dict([(x, getattr(data_element, x)() if callable(getattr(data_element, x)) else getattr(data_element, x)) for x in dir(data_element) if not x.startswith("_")]) if data_element.VR == "SQ": yield sequence_element_format % elem_dict else: yield element_format % elem_dict def _pretty_str(self, indent=0, top_level_only=False): """Return a string of the data_elements in this dataset, with indented levels. This private method is called by the __str__() method for handling print statements or str(dataset), and the __repr__() method. It is also used by top(), which is the reason for the top_level_only flag. This function recurses, with increasing indentation levels. """ strings = [] indent_str = self.indent_chars * indent nextindent_str = self.indent_chars * (indent + 1) for data_element in self: with tag_in_exception(data_element.tag): if data_element.VR == "SQ": # a sequence strings.append(indent_str + str(data_element.tag) + " %s %i item(s) ---- " % (data_element.description(), len(data_element.value))) if not top_level_only: for dataset in data_element.value: strings.append(dataset._pretty_str(indent + 1)) strings.append(nextindent_str + "---------") else: strings.append(indent_str + repr(data_element)) return "\n".join(strings) def remove_private_tags(self): """Remove all Dicom private tags in this dataset and those contained within.""" def RemoveCallback(dataset, data_element): """Internal method to use as callback to walk() method.""" if data_element.tag.is_private: # can't del self[tag] - won't be right dataset on recursion del dataset[data_element.tag] self.walk(RemoveCallback) def save_as(self, filename, write_like_original=True): """Write the dataset to a file. :param filename: full path and filename to save the file to :write_like_original: see dicom.filewriter.write_file for info on this parameter. """ dicom.write_file(filename, self, write_like_original) def __setattr__(self, name, value): """Intercept any attempts to set a value for an instance attribute. If name is a dicom descriptive string (cleaned with CleanName), then set the corresponding tag and data_element. Else, set an instance (python) attribute as any other class would do. """ tag = tag_for_name(name) if tag is not None: # successfully mapped name to a tag if tag not in self: # don't have this tag yet->create the data_element instance VR = dictionaryVR(tag) data_element = DataElement(tag, VR, value) else: # already have this data_element, just changing its value data_element = self[tag] data_element.value = value # Now have data_element - store it in this dict self[tag] = data_element else: # name not in dicom dictionary - setting a non-dicom instance attribute # XXX note if user mis-spells a dicom data_element - no error!!! self.__dict__[name] = value def __setitem__(self, key, value): """Operator for dataset[key]=value. Check consistency, and deal with private tags""" if not isinstance(value, (DataElement, RawDataElement)): # ok if is subclass, e.g. DeferredDataElement raise TypeError("Dataset contents must be DataElement instances.\n" "To set a data_element value use data_element.value=val") tag = Tag(value.tag) if key != tag: raise ValueError("data_element.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if isinstance(data_element, RawDataElement): data_element = DataElement_from_raw(data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element) def __str__(self): """Handle str(dataset).""" return self._pretty_str() def top(self): """Show the DICOM tags, but only the top level; do not recurse into Sequences""" return self._pretty_str(top_level_only=True) def trait_names(self): """Return a list of valid names for auto-completion code Used in IPython, so that data element names can be found and offered for autocompletion on the IPython command line """ return dir(self) # only valid python >=2.6, else use self.__dir__() def update(self, dictionary): """Extend dict.update() to handle DICOM keywords.""" for key, value in dictionary.items(): if isinstance(key, (str, unicode)): setattr(self, key, value) else: self[Tag(key)] = value def iterall(self): """Iterate through the dataset, yielding all data elements. Unlike Dataset.__iter__, this *does* recurse into sequences, and so returns all data elements as if the file were "flattened". """ for data_element in self: yield data_element if data_element.VR == "SQ": sequence = data_element.value for dataset in sequence: for elem in dataset.iterall(): yield elem def walk(self, callback, recursive=True): """Call the given function for all dataset data_elements (recurses). Visit all data_elements, recurse into sequences and their datasets (if specified), The callback function is called for each data_element (including SQ element). Can be used to perform an operation on certain types of data_elements. E.g., `remove_private_tags`() finds all private tags and deletes them. :param callback: a callable taking two arguments: a dataset, and a data_element belonging to that dataset. :param recursive: a boolean indicating whether to recurse into Sequences `DataElement`s will come back in DICOM order (by increasing tag number within their dataset) """ taglist = sorted(self.keys()) for tag in taglist: with tag_in_exception(tag): data_element = self[tag] callback(self, data_element) # self = this Dataset # 'tag in self' below needed in case callback deleted data_element if recursive and tag in self and data_element.VR == "SQ": sequence = data_element.value for dataset in sequence: dataset.walk(callback) __repr__ = __str__ class FileDataset(Dataset): def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None, is_implicit_VR=True, is_little_endian=True): """Initialize a dataset read from a DICOM file :param filename: full path and filename to the file. Use None if is a BytesIO. :param dataset: some form of dictionary, usually a Dataset from read_dataset() :param preamble: the 128-byte DICOM preamble :param file_meta: the file meta info dataset, as returned by _read_file_meta, or an empty dataset if no file meta information is in the file :param is_implicit_VR: True if implicit VR transfer syntax used; False if explicit VR. Default is True. :param is_little_endian: True if little-endian transfer syntax used; False if big-endian. Default is True. """ Dataset.__init__(self, dataset) self.preamble = preamble self.file_meta = file_meta self.is_implicit_VR = is_implicit_VR self.is_little_endian = is_little_endian if isinstance(filename_or_obj, basestring): self.filename = filename_or_obj self.fileobj_type = open elif isinstance(filename_or_obj, io.BufferedReader): self.filename = filename_or_obj.name # This is the appropriate constructor for io.BufferedReader self.fileobj_type = open else: self.fileobj_type = filename_or_obj.__class__ # use __class__ python <2.7?; http://docs.python.org/reference/datamodel.html if getattr(filename_or_obj, "name", False): self.filename = filename_or_obj.name elif getattr(filename_or_obj, "filename", False): # gzip python <2.7? self.filename = filename_or_obj.filename else: self.filename = None # e.g. came from BytesIO or something file-like self.timestamp = None if stat_available and self.filename and os.path.exists(self.filename): statinfo = stat(self.filename) self.timestamp = statinfo.st_mtime