#!/usr/bin/env python
# FindImages.py
#   Copyright (C) 2006 CCLRC, Graeme Winter
#
#   This code is distributed under the BSD license, a copy of which is
#   included in the root directory of this package.
#
# 9th June 2006
#
# A set of routines for finding images and the like based on file names.
# This includes all of the appropriate handling for templates, directories
# and the like.
#
# 15/JUN/06
#
# Also routines for grouping sets of images together into sweeps based on
# the file names and the information in image headers.
#
# FIXME 24/AUG/06 this needs to renamed to something a little more obvious
#                 than FindImages - perhaps ImageExpert?
# FIXME 04/OCT/06 when the image name is all numbers like 999_1_001 need to
#                 assume that the extension is the number, BEFORE testing any
#                 of the other possibilities...
# FIXME 04/OCT/10 when we have images 200-299 (say) don't merge th2 2 with
#                 the template - you end up with batch 0.
#

import sys
import os
import re
import string
import math
import copy

if not os.environ.has_key('XIA2_ROOT'):
  raise RuntimeError, 'XIA2_ROOT not defined'

if not os.environ['XIA2_ROOT'] in sys.path:
  sys.path.append(os.environ['XIA2_ROOT'])

from Handlers.Streams import Debug

# N.B. these are reversed patterns...

patterns = [r'([0-9]{2,12})\.(.*)',
            r'(.*)\.([0-9]{2,12})_(.*)',
            r'(.*)\.([0-9]{2,12})(.*)']

joiners = ['.', '_', '']

compiled_patterns = [re.compile(pattern) for pattern in patterns]

def template_regex(filename):
  '''Try a bunch of templates to work out the most sensible. N.B. assumes
  that the image index will be the last digits found in the file name.'''

  rfilename = filename[::-1]

  global patterns, compiled_patterns

  template = None
  digits = None

  for j, cp in enumerate(compiled_patterns):
    match = cp.match(rfilename)
    if not match:
      continue
    groups = match.groups()

    if len(groups) == 3:
      exten = '.' + groups[0][::-1]
      digits = groups[1][::-1]
      prefix = groups[2][::-1] + joiners[j]
    else:
      exten = ''
      digits = groups[0][::-1]
      prefix = groups[1][::-1] + joiners[j]

    template = prefix + ''.join(['#' for d in digits]) + exten
    break

  if not template:
    raise RuntimeError, 'template not recognised for %s' %filename

  return template, int(digits)

def work_template_regex():
  questions_answers = {
      'foo_bar_001.img':'foo_bar_###.img',
      'foo_bar001.img':'foo_bar###.img',
      'foo_bar_1.8A_001.img':'foo_bar_1.8A_###.img',
      'foo_bar.001':'foo_bar.###',
      'foo_bar_001.img1000':'foo_bar_###.img1000',
      'foo_bar_00001.img':'foo_bar_#####.img'
      }

  for filename in questions_answers:
    answer = template_regex(filename)
    assert answer[0] == questions_answers[filename]

def image2template(filename):
  return template_regex(filename)[0]

def image2template_old(filename):
  '''Return a template to match this filename.'''

  # check that the file name doesn't contain anything mysterious
  if filename.count('#'):
    raise RuntimeError, '# characters in filename'

  # the patterns in the order I want to test them

  pattern_keys = [r'([^\.]*)\.([0-9]{2,12})\Z',
                  r'([^\.]*)\.([0-9]{2,12})(.*)',
                  r'(.*)_([0-9]{2,12})\.(.*)',
                  r'(.*?)([0-9]{2,12})\.(.*)']

  # patterns is a dictionary of possible regular expressions with
  # the format strings to put the file name back together

  patterns = {r'([^\.]*)\.([0-9]{2,12})\Z':'%s.%s%s',
              r'([^\.]*)\.([0-9]{2,12})(.*)':'%s.%s%s',
              r'(.*)_([0-9]{2,12})\.(.*)':'%s_%s.%s',
              r'(.*?)([0-9]{2,12})\.(.*)':'%s%s.%s'}

  for pattern in pattern_keys:
    match = re.compile(pattern).match(filename)

    if match:
      prefix = match.group(1)
      number = match.group(2)
      try:
        exten = match.group(3)
      except:
        exten = ''

      for digit in string.digits:
        number = number.replace(digit, '#')

      return patterns[pattern] % (prefix, number, exten)

  raise RuntimeError, 'filename %s not understood as a template' % \
        filename

def image2image(filename):
  return template_regex(filename)[1]

def image2image_old(filename):
  '''Return an integer for the template to match this filename.'''

  # check that the file name doesn't contain anything mysterious
  if filename.count('#'):
    raise RuntimeError, '# characters in filename'

  # the patterns in the order I want to test them

  pattern_keys = [r'([^\.]*)\.([0-9]+)\Z',
                  r'([^\.]*)\.([0-9]+)(.*)',
                  r'(.*)_([0-9]*)\.(.*)',
                  r'(.*?)([0-9]*)\.(.*)']

  for pattern in pattern_keys:
    match = re.compile(pattern).match(filename)

    if match:
      prefix = match.group(1)
      number = match.group(2)
      try:
        exten = match.group(3)
      except:
        exten = ''

      return int(number)

  raise RuntimeError, 'filename %s not understood as a template' % \
        filename

def image2template_directory(filename):
  '''Separate out the template and directory from an image name.'''

  directory = os.path.dirname(filename)

  if not directory:

    # then it should be the current working directory
    directory = os.getcwd()

  image = os.path.split(filename)[-1]
  template = image2template(image)

  return template, directory

def find_matching_images(template, directory):
  '''Find images which match the input template in the directory
  provided.'''

  files = os.listdir(directory)

  # to turn the template to a regular expression want to replace
  # however many #'s with EXACTLY the same number of [0-9] tokens,
  # e.g. ### -> ([0-9]{3})

  # change 30/may/2008 - now escape the template in this search to cope with
  # file templates with special characters in them, such as "+" -
  # fix to a problem reported by Joel B.

  length = template.count('#')
  regexp_text = re.escape(template).replace(
      '\\#' * length, '([0-9]{%d})' % length)
  regexp = re.compile(regexp_text)

  # FIXME there are faster ways of determining this - by generating the lists
  # of possible images. That said, the code for this is now in dxtbx...

  images = []

  for f in files:
    match = regexp.match(f)

    if match:
      images.append(int(match.group(1)))

  images.sort()

  return images

def template_directory_number2image(template, directory, number):
  '''Construct the full path to an image from the template, directory
  and image number.'''

  # FIXME why does this duplicate code shown below??

  length = template.count('#')

  # check that the number will fit in the template

  if (math.pow(10, length) - 1) < number:
    raise RuntimeError, 'number too big for template'

  # construct a format statement to give the number part of the
  # template
  format = '%%0%dd' % length

  # construct the full image name
  image = os.path.join(directory,
                       template.replace('#' * length,
                                        format % number))

  return image

def template_number2image(template, number):
  '''Construct the an image from the template and image number.'''

  length = template.count('#')

  # check that the number will fit in the template

  if (math.pow(10, length) - 1) < number:
    raise RuntimeError, 'number too big for template'

  format = '%%0%dd' % length

  image = template.replace('#' * length, format % number)

  return image

def headers2sweep_ids(header_dict):
  '''Get a list of sweep ids (first images) from the header list.'''

  sweeps = headers2sweeps(header_dict)

  ids = []

  for s in sweeps:
    ids.append(min(s['images']))

  return ids

def headers2sweeps(header_dict):
  '''Parse a dictionary of headers to produce a list of summaries.'''

  # SCI-545 - remove still images from sweeps

  zap = []

  for i in header_dict:
    header = header_dict[i]
    delta_phi = math.fabs(header['phi_end'] - header['phi_start'])
    if delta_phi == 0:
      zap.append(i)

  Debug.write('Removing %d apparently still images' % len(zap))

  for z in zap:
    del(header_dict[z])

  images = sorted(header_dict)

  if len(images) == 0:
    return []

  sweeps = []

  current_sweep = copy.deepcopy(header_dict[images[0]])

  current_sweep['images'] = [images[0]]

  # observation: in RIGAKU SATURN data sets the epoch is the same for
  # all images => add the IMAGE NUMBER to this as a workaround if
  # that format. See also RIGAKU_SATURN below.

  if 'rigaku saturn' in current_sweep['detector_class']:
    current_sweep['epoch'] += images[0]

  current_sweep['collect_start'] = current_sweep['epoch']
  current_sweep['collect_end'] = current_sweep['epoch']

  for i in images[1:]:
    header = header_dict[i]

    # RIGAKU_SATURN see above

    if 'rigaku saturn' in header['detector_class']:
      header['epoch'] += i

    # if wavelength the same and distance the same and this image
    # follows in phi from the previous chappie then this is the
    # next frame in the sweep. otherwise it is the first frame in
    # a new sweep.

    delta_lambda = math.fabs(
        header['wavelength'] - current_sweep['wavelength'])
    delta_distance = math.fabs(
        header['distance'] - current_sweep['distance'])
    delta_phi = math.fabs(
        header['phi_start'] - current_sweep['phi_end']) % 360.0

    # Debug.write('Image %d %f %f %f' % \
    # (i, delta_lambda, delta_distance,
    # min(delta_phi, 360.0 - delta_phi)))

    if delta_lambda < 0.0001 and \
           delta_distance < 0.01 and \
           min(delta_phi, 360.0 - delta_phi) < 0.01 and \
           i == current_sweep['images'][-1] + 1:
      # this is another image in the sweep
      # Debug.write('Image %d belongs to the sweep' % i)
      current_sweep['images'].append(i)
      current_sweep['phi_end'] = header['phi_end']
      current_sweep['collect_end'] = header['epoch']
    else:
      Debug.write('Image %d starts a new sweep' % i)
      sweeps.append(current_sweep)
      current_sweep = header_dict[i]
      current_sweep['images'] = [i]
      current_sweep['collect_start'] = current_sweep['epoch']
      current_sweep['collect_end'] = current_sweep['epoch']

  sweeps.append(current_sweep)

  return sweeps

def common_prefix(strings):
  '''Find a common prefix among the list of strings. May return an empty
  string. This is O(n^2).'''

  common = strings[0]
  finished = False

  while not finished:

    finished = True
    for s in strings:
      if not common == s[:len(common)]:
        common = common[:-1]
        finished = False
        continue

  return common

def ensure_no_batches_numbered_zero(template, images, offset):
  '''Working in collaboration with digest_template, ensure that none of
  the images end up being numbered 0, and if they do try to add last digit of
  template section. Finally, if this extra character is not a digit raise
  an exception.'''

  if min(images) > 0:
    return template, images, offset

  prefix = template.split('#')[0]
  suffix = template.split('#')[-1]
  hashes = template.count('#')

  while min(images) == 0:
    if not prefix[-1] in string.digits:
      raise RuntimeError, 'image 0 found matching %s' % template

    add = int(prefix[-1]) * int(math.pow(10, hashes))
    offset -= add
    hashes += 1
    prefix = prefix[:-1]
    images = [add + i for i in images]

  template = '%s%s%s' % (prefix, '#' * hashes, suffix)

  return template, images, offset

def digest_template(template, images):
  '''Digest the template and image numbers to copy as much of the
  common characters in the numbers as possible to the template to
  give smaller image numbers.'''

  length = template.count('#')

  format = '%%0%dd' % length

  strings = [format % i for i in images]

  prefix = common_prefix(strings)
  offset = 0

  if prefix:
    offset = int(prefix + '0' * (length - len(prefix)))
    template = template.replace(len(prefix) * '#', prefix, 1)
    images = [int(s.replace(prefix, '', 1)) for s in strings]

  try:
    template, images, offset = ensure_no_batches_numbered_zero(
        template, images, offset)
  except RuntimeError, e:
    Debug.write('Throwing away image 0 from template %s' % template)
    template, images, offset = ensure_no_batches_numbered_zero(
        template, images[1:], offset)

  return template, images, offset

if __name__ == '__main__':

  work_template_regex()