# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.


This module provides a way to create indexes to text files.

Index     Dictionary-like class used to store index information.

_ShelveIndex    An Index class based on the shelve module.
_InMemoryIndex  An in-memory Index class.

import os
import array
import shelve

    import cPickle as pickle # Only available under Python 2
except ImportError:
    import pickle # Python 3

class _ShelveIndex(dict):
    """An index file wrapped around shelve.

    # Without a good dbm module installed, this is pretty slow and
    # generates large files.  When generating an index on a FASTA-
    # formatted file with 82000 sequences (37Mb), the
    # index 'dat' file is 42Mb and 'dir' file is 8Mb.

    __version = 2
    __version_key = '__version'

    def __init__(self, indexname, truncate=None):
            if truncate:
                # In python 1.52 and before, dumbdbm (under shelve)
                # doesn't clear the old database.
                files = [indexname + '.dir',
                         indexname + '.dat',
                         indexname + '.bak'
                for file in files:
                    if os.path.exists(file):
                raise Exception("open a new shelf")
            self.data = shelve.open(indexname, flag='r')
            # No database exists.
            self.data = shelve.open(indexname, flag='n')
            self.data[self.__version_key] = self.__version
            # Check to make sure the database is the correct version.
            version = self.data.get(self.__version_key, None)
            if version is None:
                raise IOError("Unrecognized index format")
            elif version != self.__version:
                raise IOError("Version %s doesn't match my version %s"
                              % (version, self.__version))

    def __del__(self):
        if 'data' in self.__dict__:

class _InMemoryIndex(dict):
    """This creates an in-memory index file.

    # File Format:
    # version
    # key value
    # [...]

    __version = 3
    __version_key = '__version'

    def __init__(self, indexname, truncate=None):
        self._indexname = indexname
        self.__changed = 0     # the index hasn't changed

        # Remove the database if truncate is true.
        if truncate and os.path.exists(indexname):
            self.__changed = 1

        # Load the database if it exists
        if os.path.exists(indexname):
            with open(indexname) as handle:
                version = self._toobj(handle.readline().rstrip())
                if version != self.__version:
                    raise IOError("Version %s doesn't match my version %s"
                                  % (version, self.__version))
                for line in handle:
                    key, value = line.split()
                    key, value = self._toobj(key), self._toobj(value)
                    self[key] = value
                self.__changed = 0

    def update(self, dict):
        self.__changed = 1
        dict.update(self, dict)

    def __setitem__(self, key, value):
        self.__changed = 1
        dict.__setitem__(self, key, value)

    def __delitem__(self, key):
        self.__changed = 1
        dict.__delitem__(self, key)

    def clear(self):
        self.__changed = 1

    def __del__(self):
        if self.__changed:
            with open(self._indexname, 'w') as handle:
                handle.write("%s\n" % self._tostr(self.__version))
                for key, value in self.items():
                    handle.write("%s %s\n" %
                                 (self._tostr(key), self._tostr(value)))

    def _tostr(self, obj):
        # I need a representation of the object that's saveable to
        # a file that uses whitespace as delimiters.  Thus, I'm
        # going to pickle the object, and then convert each character of
        # the string to its ASCII integer value.  Then, I'm going to convert
        # the integers into strings and join them together with commas.
        # It's not the most efficient way of storing things, but it's
        # relatively fast.
        s = pickle.dumps(obj)
        intlist = array.array('b', s)
        return ','.join(str(i) for i in intlist)

    def _toobj(self, str):
        intlist = [int(i) for i in str.split(',')]
        intlist = array.array('b', intlist)
        return pickle.loads(''.join(chr(i) for i in intlist))

Index = _InMemoryIndex