# -----------------------------------------------------------------------------
# Write volume data in hdf5 format.
#
# Example HDF5 format written by Chimera.
#
# /Chimera
#  /image1
#    chimera_map_version 1
#    chimera_version "1.2422"
#    name "centriole"
#    step (1.2, 1.2, 1.2)
#    origin (-123.4, -522, 34.5)
#    cell_angles (90.0, 90.0, 90.0)
#    rotation_axis (0.0, 0.0, 1.0)
#    rotation_angle 45.0
#    symmetries (((0,-1,0,0),(1,0,0,0),(0,0,1,0)),...)
#    data_zyx (3d array of uint8 (123,542,82))
#    data_yzx (3d array of uint8 (123,542,82), alternate chunk shape)
#    data_zyx_2 (3d array of uint8 (61,271,41))
#        subsample_spacing (2, 2, 2)
#    (more subsampled or alternate chunkshape versions of same data)
#
# Names "Chimera", "chimera_version", "name", "step", "origin", "cell_angles",
# "rotation_axis", "rotation_angle", "symmetries", "subsample_spacing"
# are fixed while "image1", "data_zyx", "data_yzx" and "data_zyx_2" can
# be any name.
#
# In the example "Chimera" and "image1" are HDF groups,
# "chimera_version", "name", "step", "origin", "cell_angles",
# "rotation_axis", "rotation_angle", "symmetries" are group
# attributes, "data_zyx", "data_yzx" and "data_zyx_2" are hdf datasets
# (arrays), and "subsample_step" is a dataset attribute.
#
# All data sets within the group represent the same data, such as optional
# subsampled arrays or alternate chunk shape for efficient disk access.
#
# Cell angles need not be included if they are 90,90,90.  They are
# included for handling crystallographic density maps.  An identity
# rotation need not be included.  The rotation angle is in degrees.
# Symmetries need not be included.
#
# The file is saved with the Python PyTables modules which includes
# additional attributes "VERSION", "CLASS", "TITLE", "PYTABLES_FORMAT_VERSION".
#
def write_grid_as_chimera_map(grid_data, path, options = {}, progress = None):

    settings = {
        'min_subsample_elements': 2**17,
        'chunk_shapes': ['zyx'],
        'chunk_size': 2**16,       # 64 Kbytes
        'append': False,
        'compress': False,
        'anisotropic':True,
        }
    settings.update(options)

    from VolumeData import Grid_Data
    if isinstance(grid_data, Grid_Data):
        data_sets = [grid_data]
    else:
        data_sets = grid_data

    if settings['append']:      mode = 'a'
    else:                       mode = 'w'
    import tables
    h5file = tables.openFile(path, mode = mode)
    if progress:
        progress.close_on_cancel(h5file)

    if '/Chimera' in h5file:
        cg = h5file.getNode('/Chimera')
    else:
        cg = h5file.createGroup(h5file.root, 'Chimera')

    ioffset = next_suffix_number(cg, 'image')
    for i, d in enumerate(data_sets):
        g = h5file.createGroup(cg, 'image%d' % (i+ioffset))
        write_grid_data(h5file, d, g, settings, progress)

    h5file.close()

# -----------------------------------------------------------------------------
# Used to produce new group name when adding maps to an existing hdf file.
#
def next_suffix_number(group, prefix):

    imax = 0
    for gname in group._v_groups.keys():
        if gname.startswith(prefix):
            suffix = gname[len(prefix):]
            try:
                imax = max(imax, int(suffix))
            except ValueError:
                pass
    return imax + 1

# -----------------------------------------------------------------------------
# Write volume data in Chimera hdf5 format.
#
def write_grid_data(h5file, grid_data, g, settings, progress):

    if progress:
        from os.path import basename
        wpath = basename(h5file.filename)
        progress.operation = 'Writing %s to %s' % (grid_data.name, wpath)
        progress.array_size(grid_data.size, grid_data.value_type.itemsize)

    g._v_attrs.chimera_map_version = 1

    from chimera.version import release
    g._v_attrs.chimera_version = release        # string

    if grid_data.name:
        g._v_attrs.name = data_name(grid_data)

    # Need to use numpy arrays for attributes otherwise pytables 2.0 uses
    # python pickle strings in the hdf file.  Want hdf file to be readable
    # by other software without Python.
    from numpy import array, float32
    g._v_attrs.origin = array(grid_data.origin, float32)
    g._v_attrs.step = array(grid_data.step, float32)
    if grid_data.cell_angles != (90,90,90):
        g._v_attrs.cell_angles = array(grid_data.cell_angles, float32)
    if grid_data.rotation != ((1,0,0),(0,1,0),(0,0,1)):
        import Matrix
        axis, angle = Matrix.rotation_axis_angle(grid_data.rotation)
        g._v_attrs.rotation_axis = array(axis, float32)
        g._v_attrs.rotation_angle = array(angle, float32)
    if len(grid_data.symmetries) > 0:
        g._v_attrs.symmetries = array(grid_data.symmetries, float32)

    # Determine data type.
    import tables
    atom = tables.Atom.from_dtype(grid_data.value_type)

    # Create data array, subsample arrays, and alternate chunk shape arrays.
    arrays = make_arrays(h5file, g, grid_data.size, grid_data.step, atom, settings)

    # Write values to primary and subsample arrays.
    isz,jsz,ksz = grid_data.size
    for k in range(ksz):
        if progress:
            progress.plane(k)
        # Read a single plane at a time to handle data sets that do not
        # fit in memory.
        m = grid_data.matrix((0,0,k), (isz,jsz,1))
        for (istep,jstep,kstep), a in arrays:
            if kstep == 1 or k % kstep == 0:
                a[k/kstep,:,:] = m[0,::jstep,::istep]

    # TODO: Use subsample arrays if available.
    # TODO: Optimize read write depending on chunk shapes.

# -----------------------------------------------------------------------------
# Determine name used to distinguish multiple maps in file.
#
def data_name(grid_data):

    return grid_data.name                       # often is file name

# -----------------------------------------------------------------------------
#
def make_arrays(h5file, g, size, spacing, atom, settings):

    chunk_elements = settings['chunk_size'] / atom.itemsize
    chunk_shapes = settings['chunk_shapes']
    min_subsample_elements = settings['min_subsample_elements']

    if 'compress' in settings and settings['compress']:
        from tables import Filters
        filters = Filters(complevel = 9)
    else:
        filters = None
    
    arrays = []
    isize, jsize, ksize = size
    shape = (ksize,jsize,isize)
    cshapes = {}    # Avoid duplicating chunk shapes
    for csname in chunk_shapes:
        cshape = chunk_shape(shape, csname, chunk_elements)
        if not cshape in cshapes:
            a = h5file.createCArray(g, 'data_' + csname, atom, shape,
                                    chunkshape = cshape, filters = filters)
            arrays.append(((1,1,1),a))
            cshapes[cshape] = True

    # Make subsample arrays.
    aniso = settings.get('anisotropic')
    istep, jstep, kstep = subsample_step((1,1,1), spacing, aniso)
    from numpy import array, int32
    while (isize >= istep and jsize >= jstep and ksize >= kstep and
           (isize/istep)*(jsize/jstep)*(ksize/kstep) >= min_subsample_elements):
        shape = (1+(ksize-1)/kstep, 1+(jsize-1)/jstep, 1+(isize-1)/istep)
        cshapes = {}    # Avoid duplicating chunk shapes
        for csname in chunk_shapes:
            cshape = chunk_shape(shape, csname, chunk_elements)
            if not cshape in cshapes:
                a = h5file.createCArray(g, 'data_%s_%d%d%d' % (csname,istep,jstep,kstep), atom,
                                        shape, chunkshape = cshape,
                                        filters = filters)
                a._v_attrs.subsample_spacing = array((istep,jstep,kstep), int32)
                arrays.append(((istep,jstep,kstep), a))
                cshapes[cshape] = True
        istep, jstep, kstep = subsample_step((istep,jstep,kstep), spacing, aniso)

    return arrays

# -----------------------------------------------------------------------------
#
def subsample_step(step, spacing, aniso):
    if aniso:
        s = min(st*sp for st,sp in zip(step,spacing))
        sstep = tuple((2*st if st*sp < 2*s else st) for st,sp in zip(step,spacing))
    else:
        sstep = tuple(2*st for st in step)
    return sstep

# -----------------------------------------------------------------------------
# Name corresponds to slow-medium-fast axes.
#
def chunk_shape(shape, name, chunk_elements):

    if name in ('zyx', 'zxy', 'yxz', 'yzx', 'xzy', 'xyz'):
        axes = {'x':2, 'y':1, 'z':0}
        smf_axes = [axes[a] for a in name]      # slow, medium, fast axes
        fms_axes = reversed(smf_axes)
        cshape = [1,1,1]
        csize = 1
        for a in fms_axes:
            # Avoid chunk shapes like (1,n-1,n) for n^3 data set that make
            # the file size twice as large as needed.
            s = min(shape[a], max(1, int(chunk_elements/csize)))
            c = (shape[a] + s - 1) / s  # chunks needed to cover
            ms = (shape[a] + c - 1) / c # min size to cover with c chunks.
            cshape[a] = ms
            csize *= cshape[a]
        cshape = tuple(cshape)
        return cshape

    return None                 # Use hdf default chunk shape.