def segmentSieve(rList0, rList1, fraction=0.5):
	"""MolMovDB style segmenting.  Use sieve fit to find core.
	Everything else is in the second segment."""
	# SieveFit checks that the two molecule have equivalent residues
	import SieveFit
	coreList0, coreList1 = SieveFit.fitResidues(rList0, rList1, fraction)
	def splitCore(rList, coreSet):
		core = []
		others = []
		for r in rList:
			if r in coreSet:
				core.append(r)
			else:
				others.append(r)
		return core, others

	# First we split our residue list by chain identifier
	# For each chain, we split into a core segment and an other segment
	coreSet0 = set(coreList0)
	coreSet1 = set(coreList1)
	segments = list()
	start = 0
	chain = rList0[0].id.chainId
	for i in range(1, len(rList0)):
		r0 = rList0[i]
		if r0.id.chainId != chain:
			core0, others0 = splitCore(rList0[start:i], coreSet0)
			core1, others1 = splitCore(rList1[start:i], coreSet1)
			segments.append((core0, core1))
			segments.append((others0, others1))
			start = i
	if start < len(rList0):
		core0, others0 = splitCore(rList0[start:], coreSet0)
		core1, others1 = splitCore(rList1[start:], coreSet1)
		segments.append((core0, core1))
		segments.append((others0, others1))
	return segments

def segmentHingeExact(m0, m1, fraction=0.5):

        # Split by chain
        cr0, cr1 = residuesByChain(m0.residues), residuesByChain(m1.residues)
        if len(cr0) != len(cr1):
		raise ValueError("models have different number of chains")

        # If chain ids all match then pair same ids, otherwise use order from file.
        if set(cid for cid, rlist in cr0) == set(cid for cid, rlist in cr1):
                cr0.sort()
                cr1.sort()

        parts = []
	atomMap = {}
	unusedAtoms = []
        for (cid0,r0list), (cid1,r1list) in zip(cr0, cr1):
                if len(r0list) != len(r1list):
                        raise ValueError("Chains %s and %s have different number of residues" % (cid0, cid1))
                curCat = None
                curRList0 = None
                curRList1 = None
                for r0, r1 in zip(r0list, r1list):
                        if not shareAtoms(r0, r1, atomMap, unusedAtoms):
                                raise ValueError("residues do not share atoms")
                        #
                        # Split residues based on surface category (in m0)
                        #
                        if r0.atoms[0].surfaceCategory == curCat:
                                curRList0.append(r0)
                                curRList1.append(r1)
                        else:
                                curRList0 = [ r0 ]
                                curRList1 = [ r1 ]
                                parts.append((curRList0, curRList1))
                                curCat = r0.atoms[0].surfaceCategory

	#
	# Split each part on hinges and collate results
	#
	segments = []
	for rList0, rList1 in parts:
		segments.extend(segmentHingeResidues(rList0, rList1, fraction))
	return segments, atomMap, [], unusedAtoms

def residuesByChain(residues):
        cres = {}
        cids = []
        for r in residues:
                cid = r.id.chainId
                if cid in cres:
                        cres[cid].append(r)
                else:
                        cres[cid] = [r]
                        cids.append(cid)
        return [(cid, cres[cid]) for cid in cids]

def segmentHingeApproximate(m0, m1, fraction=0.5, matrix="BLOSUM-62"):
	#
	# Get the chains from each model.  If they do not have the
	# same number of chains, we give up.  Otherwise, we assume
	# that the chains should be matched in the same order.
	#
	m0seqs = m0.sequences()
	m1seqs = m1.sequences()
	if len(m0seqs) != len(m1seqs):
		raise ValueError("models have different number of chains")
	resCount0 = len(m0.residues)
	matchCount0 = 0
	for seq0 in m0seqs:
		matchCount0 += len(seq0.residues)
	print "Aligning %d of %d residues from molecule %s" % (
			matchCount0, resCount0, m0.name)
	resCount1 = len(m1.residues)
	matchCount1 = 0
	for seq1 in m1seqs:
		matchCount1 += len(seq1.residues)
	print "Aligning %d of %d residues from molecule %s" % (
			matchCount1, resCount1, m1.name)

	#
	# Any residue that does not appear in chains are unused
	#
	maybe = set()
	for seq0 in m0seqs:
		maybe.update(seq0.residues)
	unusedResidues = [ r0 for r0 in m0.residues if r0 not in maybe ]

	#
	# Try to find the best matches for sequences.
	# If both models have chains with the same ids, assume that
	# chains with the same ids match.  Otherwise, assume the chains
	# match in input order.
	#
	m0map = dict((s.chain, s) for s in m0seqs)
	m1map = dict((s.chain, s) for s in m1seqs)
	if set(m0map.keys()) == set(m1map.keys()):
		seqPairs = [ (m0map[k], m1map[k]) for k in m0map.keys() ]
	else:
		seqPairs = zip(m0seqs, m1seqs)

	#
	# For each chain pair, we align them using MatchMaker to get
	# the residue correspondences.
	#
	import MatchMaker
	ksdsspCache = set([m0, m1])
	parts = []
	atomMap = {}
	unusedAtoms = []
	matched = 0
	matrices = [
		MatchMaker.defaults[MatchMaker.MATRIX],
		"Nucleic",
	]
	for seq0, seq1 in seqPairs:
		for matrix in matrices:
			if (MatchMaker.matrixCompatible(seq0, matrix)
			and MatchMaker.matrixCompatible(seq1, matrix)):
				break
		else:
			continue
		score, gapped0, gapped1 = MatchMaker.align(seq0, seq1,
				matrix, "nw",
				MatchMaker.defaults[MatchMaker.GAP_OPEN],
				MatchMaker.defaults[MatchMaker.GAP_EXTEND],
				ksdsspCache)
		rList0 = []
		rList1 = []
		for pos in range(len(gapped0)):
			i0 = gapped0.gapped2ungapped(pos)
			if i0 is None:
				continue
			r0 = gapped0.residues[i0]
			if r0 is None:
				continue
			i1 = gapped1.gapped2ungapped(pos)
			if i1 is None:
				unusedResidues.append(r0)
				continue
			r1 = gapped1.residues[i1]
			if r1 is None:
				unusedResidues.append(r0)
				continue
			if not shareAtoms(r0, r1, atomMap, unusedAtoms):
				unusedResidues.append(r0)
				continue
			rList0.append(r0)
			rList1.append(r1)
			matched += 1
		if rList0:
			parts.append((rList0, rList1))

	#
	# Split each part on hinges and collate results
	#
	segments = []
	for rList0, rList1 in parts:
		segments.extend(segmentHingeResidues(rList0, rList1, fraction))

	#
	# Identify any residues that were not in sequences but have
	# similar connectivity (e.g., metals and ligands) and share
	# some common atoms
	#
	segmentMap = dict()
	residueMap = dict()
	for sIndex, s in enumerate(segments):
		for r0, r1 in zip(s[0], s[1]):
			residueMap[r1] = r0
			segmentMap[r0] = sIndex
	used = set()
	for seq0 in m0seqs:
		used.update(seq0.residues)
	m0candidates = [ r0 for r0 in m0.residues if r0 not in used ]
	keyMap = dict()
	for r0 in m0candidates:
		neighbors = _getConnectedResidues(r0)
		if not neighbors:
			continue
		nlist = list(neighbors)
		nlist.sort()
		keyMap[tuple(nlist)] = r0
	used = set()
	for seq1 in m1seqs:
		used.update(seq1.residues)
	m1candidates = [ r1 for r1 in m1.residues if r1 not in used ]
	for r1 in m1candidates:
		neighbors = _getConnectedResidues(r1)
		if not neighbors:
			continue
		try:
			nlist = [ residueMap[r] for r in neighbors ]
		except KeyError:
			pass
		else:
			nlist.sort()
			key = tuple(nlist)
			try:
				r0 = keyMap[key]
				sIndex = segmentMap[nlist[0]]
			except KeyError:
				pass
			else:
				if shareAtoms(r0, r1, atomMap, unusedAtoms):
					s0, s1 = segments[sIndex]
					segments[sIndex] = (s0 + (r0,),
								s1 + (r1,))
					unusedResidues.remove(r0)
					matched += 1

	#
	# Finally, finished
	#
	print "Matched %d residues in %d segments" % (matched, len(segments))
	return segments, atomMap, unusedResidues, unusedAtoms

def segmentHingeResidues(rList0, rList1, fraction):
	#
	# Find matching set of residues
	#
	segments = segmentSieve(rList0, rList1, fraction)

	#
	# Find hinges and split molecules at hinge residues
	# The Interpolate module wants a set of segments
	# which are 2-tuples.  Each element of the 2-tuple
	# is a tuple of residues.
	#
	from Hinge import findHinges, splitOnHinges
	hingeIndices = findHinges(rList0, rList1, segments)
	segmentsStart = [ tuple(l)
			for l in splitOnHinges(hingeIndices, rList0) ]
	segmentsEnd = [ tuple(l)
			for l in splitOnHinges(hingeIndices, rList1) ]
	segments = zip(segmentsStart, segmentsEnd)
	return segments

def shareAtoms(r0, r1, atomMap, unusedAtoms):
	# We start by finding the atom connected to the
	# previous residue.  Failing that, we want the
	# atom connected to the next residue.  Failing that,
	# we take an arbitrary atom.
	from util import residuePrimaryAtoms
	m0 = r0.molecule
	before = m0.residueBefore(r0)
	after = m0.residueAfter(r0)
	r0PrimaryAtoms = residuePrimaryAtoms(r0)
	startAtom = None
	for a0 in r0PrimaryAtoms:
		if startAtom is None:
			startAtom = a0
		for na in a0.primaryNeighbors():
			if na.residue is before:
				startAtom = a0
				break
			elif na.residue is after:
				startAtom = a0
	# From this starting atom, we do a breadth-first
	# search for an atom with a matching atom name in r1
	matched = {}
	visited = set()
	todo = [ startAtom ]
	paired = set()
	expand = []
	from util import findPrimaryAtom
	while todo:
		a0 = todo.pop(0)
		a1 = findPrimaryAtom(r1, a0.name)
		if a1 is None:
			# No match, so we put all our neighboring
			# atoms on the search list
			for na in a0.primaryNeighbors():
				if (na not in visited
				and na.residue is a0.residue):
					todo.append(na)
			visited.add(a0)
		else:
			# Found a starter atom pair
			matched[a0] = a1
			expand.append((a0, a1))
			break
	while expand:
		a0, a1 = expand.pop(0)
		if a0 in visited:
			continue
		visited.add(a0)
		# a0 and a1 are matched, now we want to see
		# if any of their neighbors match
		for na0 in a0.primaryNeighbors():
			if na0 in visited or na0.residue is not a0.residue:
				continue
			for na1 in a1.primaryNeighbors():
				if na1 in paired or na1.name != na0.name:
					continue
				matched[na0] = na1
				expand.append((na0, na1))
				paired.add(na1)
				break
	# Now we look at our results
	if not matched:
		# Note that we do not update unusedAtoms since
		# the residues do not match and will be deleted
		# as a whole.
		return False

	# Next we check for atoms we have not visited and see if
	# we can pair them
	for a0 in r0PrimaryAtoms:
		if a0 in visited:
			continue
		a1 = findPrimaryAtom(r1, a0.name)
		if a1 is not None and a1 not in paired:
			matched[a0] = a1
			paired.add(a1)

	atomMap.update(matched)
	unmatched = [ a0 for a0 in r0PrimaryAtoms if a0 not in matched ]
	unusedAtoms.extend(unmatched)
	return True

def _getConnectedResidues(r):
	neighborResidues = set()
	for a in r.atoms:
		for na in a.neighbors:
			if na.residue is not r:
				neighborResidues.add(na.residue)
		for pb in a.pseudoBonds:
			na = pb.otherAtom(a)
			if na.residue is not r:
				neighborResidues.add(na.residue)
	return neighborResidues

if __name__ == "chimeraOpenSandbox":
	import chimera
	#m0, m1 = chimera.openModels.open("testdata/1dmo.pdb")
	m0 = chimera.openModels.open("testdata/4cln-processed.pdb")[0]
	m1 = chimera.openModels.open("testdata/2bbm-matched.pdb")[0]
	segments = segmentSieve(m0.residues, m1.residues)
	print len(segments), "cores"
	for segment in segments:
		print "Core:", len(segment[0]), "residues"