#! /usr/local/bin/python2.7

# Romain M. Wolf, NIBR Basel, December 2013
# with revisions by Pawel Janowski & Jason Swails, Rutgers U., Feb. 2014

#  Copyright (c) 2013, Novartis Institutes for BioMedical Research Inc.
#  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#     * Neither the name of Novartis Institutes for BioMedical Research Inc.
#       nor the names of its contributors may be used to endorse or promote
#       products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

__version__ = "1.0"
__date__ = "March 2014"

# PDB analyzer to prepare protein(-ligand) PDB files for Amber simulations.

import os, sys
from optparse import OptionParser
from math import sqrt

import signal
def sigint_handler(*args, **kwargs):
  print >> sys.stderr, "Interrupt signal caught. Exiting"
  sys.exit(1)

signal.signal(signal.SIGINT, sigint_handler)

# Global constants
RESPROT = ('ALA', 'ARG', 'ASN', 'ASP',
           'CYS', 'GLN', 'GLU', 'GLY',
           'HIS', 'ILE', 'LEU', 'LYS',
           'MET', 'PHE', 'PRO', 'SER',
           'THR', 'TRP', 'TYR', 'VAL',
           'HID', 'HIE', 'HIN', 'HIP',
           'CYX', 'ASH', 'GLH', 'LYH',
           'ACE', 'NME', 'GL4', 'AS4',
           'WAT','HOH')

#=============================================
def pdb_read(pdbin, noter):
#=============================================
# only records starting with the following strings are kept...
  if noter:
    ACCEPTED =  ('ATOM','HETATM')
  else:
    ACCEPTED =  ('ATOM','HETATM', 'TER', 'MODEL', 'ENDMDL')
# records starting with the following strings are considered 'dividers'
# and they are cleaned for the rest of the line...
  DIVIDERS =  ('TER', 'MODEL', 'ENDMDL')

  if pdbin == 'stdin':
    records = sys.stdin
  else:
    records = open(pdbin, 'r')
  print >> sys.stderr, "\n=================================================="
  print >> sys.stderr, "Summary of pdb4amber for file %s"%pdbin
  print >> sys.stderr, "=================================================="
  '''
  This PDB reader first splits PDB lines (records) into individual (named) fields,
  then later re-assembles the fields in records. This might be done more elegantly,
  but we keep this scheme for now for clarity (given the messy PDB file format)!

    record_type     0    atom_number     1    blank1           2
    atom_name       3    alt_loc_ind     4    residue_name     5
    blank2          6    chain_id        7    residue_number   8
    insertion_code  9    blank3         10    x               11
    y               12   z              13    occupancy       14
    bfactor         15   blank4         16    element         17
    charge          18

  '''
  record_type = []; atom_number = []; blank1 = []
  atom_name = []; alt_loc_ind = []; residue_name = []; blank2 = []
  chain_id = []; residue_number = []; insertion_code = []; blank3 = []
  x = []; y = []; z = []; occupancy = []
  bfactor = []; blank4 = []; element = []; charge = []
# keep everything in 'ACCEPTED'
  lines = records.readlines()
  for line in lines:
    # make all lines 80 characters long (fill up with blanks if necessary)
    # so that the line parser will not fail on shorter lines...
    line = (line.rstrip() + (80-len(line)) * ' ')
    if line[0:6].rstrip() not in ACCEPTED:
      #~ print >> sys.stderr, '%-6sk' %line[0:6].rstrip()
      continue
# make clean divider lines without additional stuff that might hurt later
    elif line[0:6].rstrip() in DIVIDERS:
      line = line[0:6].rstrip()
      line = (line + (80-len(line)) * ' ')
    else:
      pass
# split the line into records
    record_type.append('%-6s' % line[0:6])
    atom_number.append(line[6:11])
    blank1.append(line[11:12])
    atom_name.append(line[12:16])
    alt_loc_ind.append(line[16:17]); residue_name.append(line[17:20])
    blank2.append(line[20:21]);
    chain_id.append(line[21:22])
    residue_number.append(line[22:26])
    insertion_code.append(line[26:27])
    blank3.append(line[27:30])
    x.append(line[30:38]); y.append(line[38:46]); z.append(line[46:54])
    occupancy.append(line[54:60]); bfactor.append(line[60:66])
    blank4.append(line[66:76]); element.append(line[76:78])
    charge.append(line[78:80])
  insert_resnums = []; insert_resnames = []; chains = [];
  recordlist = []
  for i, record in enumerate(record_type):
# determine insertion code
    if insertion_code[i] != ' ' and residue_number[i]+insertion_code[i] not in insert_resnums:
      insert_resnums.append(residue_number[i]+insertion_code[i])
      insert_resnames.append(residue_name[i])
# forget insertion code
      insertion_code[i] = ' '
# determine chain id and record it, if not yet found before
    if chain_id[i] != ' ' and chain_id[i] not in chains:
      chains.append(chain_id[i])
    record = [record_type[i], atom_number[i], blank1[i], atom_name[i], alt_loc_ind[i],
                  residue_name[i], blank2[i], chain_id[i], residue_number[i],
                  insertion_code[i], blank3[i], x[i], y[i], z[i],
                  occupancy[i], bfactor[i], blank4[i], element[i], charge[i]]
# append the accepted record to the overlall record list
    recordlist.append(record)
# report findings so far to the screen
  if chains:
    print >> sys.stderr, "\n----------Chains"
    print >> sys.stderr, "The following (original) chains have been found:"
    for chain in chains:
      print >> sys.stderr, chain
  if insert_resnums:
    print >> sys.stderr, "\n----------Insertions"
    print >> sys.stderr, "The following (original-number) residues were considered as insertions:"
    print >> sys.stderr, "They will be renumbered 'normally' in the final 1-N sequence." 
    for i in range(0,len(insert_resnums)):
      print >> sys.stderr, "%s%s"%(insert_resnames[i],(insert_resnums[i]))

  return(recordlist)

#==================================================
def pdb_write(recordlist, filename, cnct=''):
#==================================================
# uses a record list as created in pdb_read and writes it out to the filename
# using the format below
  if filename == 'stdout':
    pdbout = sys.stdout
  else:  
    pdbout  = open(filename, 'w')
  format = "%6s%5s%1s%4s%1s%3s%1s%1s%4s%1s%3s%8s%8s%8s%6s%6s%10s%2s%2s\n"
  for i, record in enumerate(recordlist):
    pdbout.write(format % tuple(record))
  pdbout.write(cnct)
  pdbout.write('END'+(77 * ' ')+'\n')  
  if pdbout != sys.stdout:
    pdbout.close()

#==================================================
def prot_only(recordlist, filename):
#==================================================
# this strips any residues not recogized by Amber libraries...
# in a personalized Amber installation with additional libraries, 
# you might consider extending this list
  global RESPROT
  protlist=[]
  for record in recordlist:
    if record[5] not in RESPROT:
      continue
    else:
      protlist.append(record)
  return(protlist)

#==================================================
def remove_hydrogens(recordlist):
#==================================================
  nohlist = []

  for record in recordlist:
    if record[3][0] == 'H' or record[3][1] == 'H':
      continue
    else:
      nohlist.append(record)

# return the record list with all hydrogens removed
  return(nohlist)

#========================================
def remove_water(recordlist, filename):
#========================================
# removes all water molecules of option -d was specified
  drylist = []; waterlist = []; nwaters = 0
  is_prev_water = 0
# format = "%6s%5s%1s%4s%1s%3s%1s%1s%4s%1s%3s%8s%8s%8s%6s%6s%10s%2s%2s\n"
# watpdb = open(filename+'_water.pdb', 'w')
  for record in recordlist:
# if previous record was water, remove TER card
    if is_prev_water and record[0]=='TER   ':
      is_prev_water=0      
# if oxygen, then count water
    elif (record[5] == 'HOH' or record[5] == 'WAT') and 'O' in record[3]:
      nwaters +=1
      waterlist.append(record)
      is_prev_water=1
#     watpdb.write(format % tuple(record))
      continue
# if not oxygen, just remove, but do not count, since this is probably hydrogen
    elif  record[5] == 'HOH' or record[5] == 'WAT':
      is_prev_water=1
      continue
    else:
      drylist.append(record)
      is_prev_water=0
 
# report the water removal to the screen
  print >> sys.stderr, "\n---------- Water"
  print >> sys.stderr, "%d water molecules have been removed"%nwaters
  print >> sys.stderr, "and stored in the file %s_water.pdb"%filename
# return the dry record list with all water removed
  pdb_write(waterlist, filename+'_water.pdb')
  return(drylist)


#========================================
def remove_altloc(recordlist):
#========================================
  noaltlist = []
  altloc_resnum = []; altloc_resname = []
  for record in recordlist:
# we accept only altlocs 'A' and '1'
    if record[4] != ' ' and record[4] != 'A' and record[4] != '1':
      if record[8] not in altloc_resnum:
        altloc_resnum.append(record[8])
        altloc_resname.append(record[5])
      continue

    else:
      record[4] = ' '
      noaltlist.append(record)
  if altloc_resnum:
    print >> sys.stderr, "\n---------- Alternate Locations (Original Residues!)"
    print >> sys.stderr, "The following residues had alternate locations:"

    for i, rname in enumerate(altloc_resname):
      print >> sys.stderr, "%s_%d"%(rname, int(altloc_resnum[i]))

    print >> sys.stderr, "The alternate coordinates have been discarded."
    print >> sys.stderr, "Only the first occurrence for each atom was kept."

  return(noaltlist)

#==================================================
def atom_wrap(recordlist):
#==================================================
# !!! this function should always be called !!!

# wraps 4-letter hydrogens
  wraplist = []

  for record in recordlist:
    if record[0] != 'ATOM  ' and record[0] != 'HETATM':
      wraplist.append(record)
      continue

# shifts 3-letter atoms if needed
    elif record[3][0] != ' ' and record[3][3] == ' ':
      atomname = record[3][3] + record[3][0:3]
      record[3] = atomname
      wraplist.append(record)
      continue

    else:
      wraplist.append(record)
      continue

  return(wraplist)

#========================================
def renumber(recordlist, filename):
#========================================
  table = open('%s_renum.txt'%filename, 'w')
  renumbered = []; current = -100; iatom = 1
  original = []; oriresname = []; final = []; finresname = []

  for record in recordlist:
    if not 'ATOM' in record[0] and not 'HETATM' in record[0]:
      renumbered.append(record)

    elif current == -100:
      actualnum = record[8]+record[9]
      actualname = record[5]
      if record[3] == ' CA ' or record[3] == 'CH3':
        original.append(record[8]+record[9])
        oriresname.append(record[5])

      record[8] = 1
      record[9] = ' '
      current = 1
      record[1] = iatom
      iatom = iatom + 1

      if record[3] == ' CA ' or record[3] == ' CH3':
        final.append(record[8])
        finresname.append(record[5])

      renumbered.append(record)

    elif record[8]+record[9] == actualnum and record[5] == actualname:

      if record[3] == ' CA ' or record[3] == ' CH3':
        original.append(record[8]+record[9])
        oriresname.append(record[5])

      record[8] = current
      record[9] = ' '
      record[1] = iatom
      iatom = iatom + 1

      if record[3] == ' CA ' or record[3] == ' CH3':
        final.append(record[8])
        finresname.append(record[5])
      renumbered.append(record)

    elif record[8]+record[9] != actualnum or record[5] != actualname :
      actualnum = record[8]+record[9]
      actualname = record[5]
      if record[3] == ' CA ' or record[3] == ' CH3':
        original.append(record[8]+record[9])
        oriresname.append(record[5])

      current = current + 1
      record[8] = current
      record[9] = ' '
      record[1] = iatom
      iatom = iatom + 1

      if record[3] == ' CA ' or record[3] == ' CH3':
        final.append(record[8])
        finresname.append(record[5])
      renumbered.append(record)


  for i in range(0, len(original)):
    table.write("%3s %5s    %3s %5s\n"%(oriresname[i], (original[i]), \
                                    finresname[i], (final[i])))

  return(renumbered)

#========================================
def non_standard(recordlist, filename):
#========================================
# define the common AA and less common AA names that make up proteins
# and that are recognized by Amber routines in ATOM (or HETATM) records
  RES = ('A', 'A3', 'A5', 'ACE', \
         'ALA', 'AN', 'ARG', 'ASH', \
         'ASN', 'ASP', 'Br-', 'C', \
         'C3', 'C5', 'CN', 'CYM', \
         'CYS', 'CYX', 'Cl-', 'Cs+', \
         'DA', 'DA3', 'DA5', 'DAN', \
         'DC', 'DC3', 'DC4', 'DC5', \
         'DCN', 'DG', 'DG3', 'DG5', \
         'DGN', 'DT', 'DT3', 'DT5', \
         'DTN', 'F-', 'G', 'G3', \
         'G5', 'GLH', 'GLN', 'GLU', \
         'GLY', 'GN', 'HID', 'HIE', \
         'HIP', 'HIS', 'HOH', 'HYP', \
         'I-', 'ILE', 'K+', 'LEU', \
         'LYN', 'LYS', 'Li+', 'MET', \
         'Mg+', 'NHE', 'NME', 'Na+', \
         'OHE', 'PHE', 'PL3', 'PRO', \
         'Rb+', 'SER', 'SPC', 'SPF', \
         'SPG', 'T4E', 'THR', 'TP3', \
         'TP4', 'TP5', 'TPF', 'TRP', \
         'TYR', 'U', 'U3', 'U5', \
         'UN', 'VAL', 'WAT', 'U5', \
         'UN', 'VAL', 'WAT')

  hetero = open(filename+'_nonprot.pdb', 'w')
  format = "%6s%5s%1s%4s%1s%3s%1s%1s%4s%1s%3s%8s%8s%8s%6s%6s%10s%2s%2s\n"
  ns_resname = []

  for record in recordlist:
    if record[5].strip() not in RES and record[5] != '   ':
      hetero.write(format % tuple(record))
      if record[5] not in ns_resname:
          ns_resname.append(record[5])

  if ns_resname:
    print >> sys.stderr, "\n---------- Non-Standard Residues"
    print >> sys.stderr, "The following non-standard residue names in the original PDB file"
    print >> sys.stderr, "are not recognized by Amber and have been written to the separate"
    print >> sys.stderr, "file %s_nonprot.pdb"%filename
    print >> sys.stderr, "\n".join(ns_resname)

  return(ns_resname)

#========================================
def non_standard_elbow(recordlist):
#========================================
# define the common AA and less common AA names that make up proteins
# and that are recognized by Amber routines in ATOM (or HETATM) records
  RES = ('A', 'A3', 'A5', 'ACE', \
         'ALA', 'AN', 'ARG', 'ASH', \
        'ASN', 'ASP', 'BA', 'BR', \
        'C', 'C3', 'C5', 'CA', \
        'CD', 'CL', 'CN', 'CO', \
        'CS', 'CU', 'CYM', 'CYS', \
        'CYX', 'DA', 'DA3', 'DA5', \
        'DAN', 'DC', 'DC3', 'DC4', \
        'DC5', 'DCN', 'DG', 'DG3', \
        'DG5', 'DGN', 'DT', 'DT3', \
        'DT5', 'DTN', 'EU', 'F', \
        'FE2', 'G', 'G3', 'G5', \
        'GLH', 'GLN', 'GLU', 'GLY', \
        'GN', 'HG', 'HID', 'HIE', \
        'HIP', 'HIS', 'HOH', 'HYP', \
        'ILE', 'IOD', 'K', 'LEU', \
        'LI', 'LYN', 'LYS', 'MET', \
        'MG', 'MN', 'NA', 'NHE', \
        'NI', 'NME', 'OHE', 'PB', \
        'PD', 'PHE', 'PL3', 'PRO', \
        'PT', 'RB', 'SER', 'SPC', \
        'SPF', 'SPG', 'SR', 'T4E', \
        'THR', 'TP3', 'TP4', 'TP5', \
        'TPF', 'TRP', 'TYR', 'U', \
        'U3', 'U5', 'UN', 'V2+', \
        'VAL', 'WAT', 'YB2', 'ZN')

  
  format = "%6s%5s%1s%4s%1s%3s%1s%1s%4s%1s%3s%8s%8s%8s%6s%6s%10s%2s%2s\n"
  ns_resname = []

  for record in recordlist:
    if record[5].strip() not in RES and record[5] != '   ':
      if record[5].strip() not in ns_resname:
        ns_resname.append(record[5].strip())
        try: f.close()
        except: pass  
        f=open('4antechamber_%s.pdb' %(record[5].strip()), 'w')
        resid=record[8]
        f.write(format % tuple(record))
      else: 
        if record[8]==resid:
          f.write(format % tuple(record))
        else:
          if not f.closed: f.close()  

  return ns_resname


#========================================
def find_his(recordlist):
#========================================
  nhis = 0; hisresname = []; hisresnum = []
  histidines = ('HIS', 'HID', 'HIP', 'HIN', 'HIE')
  for record in recordlist:
    if record[5] in histidines and record[3] == ' CA ':
      hisresname.append(record[5])
      hisresnum.append(record[8])
      nhis += 1

  if nhis > 0:
    print >> sys.stderr, "\n---------- Histidines (Renumbered Residues!)"
    print >> sys.stderr, "The following %d histidines are found in the PDB file: "%nhis

    for i, rname in enumerate(hisresname):
      print >> sys.stderr, '%s_%d' % (rname, int(hisresnum[i]))

    print >> sys.stderr, "If HIS, Amber will consider them as HIE (epsilon-HIS) by default."
    print >> sys.stderr, "You might need to check their tautomerism or protonation state"
    print >> sys.stderr, "and change them to HID (delta-HIS) or HIP (protonated HIS)"

  return(recordlist)

#========================================
def constph(recordlist):
#========================================
  print >> sys.stderr, "\n---------- Constant pH Simulation"
  as4, gl4, hip = [],[],[]
  for record in recordlist:
    if record[5] == 'ASP':
      record[5] = 'AS4'
      as4.append(record[8])
    elif record[5] == 'GLU':
      record[5] = 'GL4'
      gl4.append(record[8])
    elif record[5] == 'HIS':
      record[5] = 'HIP'
      hip.append(record[8])
    else:
      continue

  print >> sys.stderr, "ASP --> AS4: %d" %len(set(as4))
  print >> sys.stderr, "GLU --> GL4: %d" %len(set(gl4))
  print >> sys.stderr, "HIS --> HIP: %d" %len(set(hip))

  return recordlist

#========================================
def find_disulfide(recordlist, filename):
#========================================
  cys_residues = [];  cys_sgx = []; cys_sgy = []; cys_sgz = []
  cyx_residues = [];  cys_sqn = []; ncys = 0; ncyx = 0

  print >> sys.stderr, "\n---------- Cysteines in Disulfide Bonds (Renumbered Residues!)"
  for record in recordlist:

    if 'SG' in record[3] and ('CYS' in record[5] or 'CYX' in record[5]):
      cys_residues.append(record[8])
      cys_sgx.append(record[11])
      cys_sgy.append(record[12])
      cys_sgz.append(record[13])
      cys_sqn.append(record[1])
      ncys += 1
  
  cnct=''
  if ncys > 0:
      
    sslink = open('%s_sslink'%filename, 'w')
    dist = [[0 for i in range(ncys)] for j in range(ncys)]
    for i in range(0, ncys-1):
      for j in range(i+1, ncys):
        dx = float(cys_sgx[i]) - float(cys_sgx[j])
        dx2 = dx*dx
        dy = float(cys_sgy[i]) - float(cys_sgy[j])
        dy2 = dy*dy
        dz = float(cys_sgz[i]) - float(cys_sgz[j])
        dz2 = dz*dz
        dist[i][j] = sqrt(dx2 +dy2 +dz2)
        if dist[i][j] < 2.5 and dist[i][j] > 0.1:
          cyx_residues.append(cys_residues[i])
          cyx_residues.append(cys_residues[j])
          print >> sys.stderr,("CYS_%s - CYS_%s: S-S distance = %f Ang."%(cys_residues[i],
                 cys_residues[j], dist[i][j]))
          sslink.write('%s %s\n'%(cys_residues[i], cys_residues[j]))
          ncyx += 1
          cnct += 'CONECT%5d%5d\n' %(cys_sqn[i],cys_sqn[j])

# rename the CYS to CYX for disulfide-involved cysteines
    for record in recordlist:
      if record[8] in cyx_residues:
        record[5] = 'CYX'
      else:
        continue
  if ncyx:
    print >> sys.stderr, "The above CYS have been renamed to CYX in the new PDB file."
    print >> sys.stderr, "Disulfide bond CONECT cards have been added to the new PDB file."

  else:
    print >> sys.stderr, "No disulfide bonds have been detected."
  return(recordlist,cnct)

#========================================
def find_gaps(recordlist):
#========================================
  global RESPROT
  ca_atoms = []; ca_resnum = []; ca_resname = [];
  ca_x = []; ca_y = []; ca_z = []
  gaplist = [];

  for record in recordlist:
    if ('CA' in record[3] or 'CH3' in record[3]) and record[5] in RESPROT:
      ca_atoms.append(record[1])
      ca_resnum.append(record[8])
      ca_resname.append(record[5])
      ca_x.append(float(record[11]))
      ca_y.append(float(record[12]))
      ca_z.append(float(record[13]))

  nca = len(ca_atoms)
  ngaps = 0

  for i in range(nca-1):
    dx = ca_x[i] - ca_x[i+1]
    dx2 = dx*dx
    dy = ca_y[i] - ca_y[i+1]
    dy2 = dy*dy
    dz = ca_z[i] - ca_z[i+1]
    dz2 = dz*dz
    gap = sqrt(dx2 +dy2 +dz2)

    if gap > 5.0:
      gaprecord = (gap, ca_resname[i], int(ca_resnum[i]), ca_resname[i+1],\
                   int(ca_resnum[i+1]))
      gaplist.append(gaprecord)
      ngaps += 1

  if ngaps > 0:
    print >> sys.stderr, "\n---------- Gaps (Renumbered Residues!)"
    format = "gap of %lf A between %s_%d and %s_%d"

    for i, gaprecord in enumerate(gaplist):
      print >> sys.stderr, (format % tuple(gaprecord))

    print >> sys.stderr, "You MUST (!!!) insert a TER record between the residues listed above and"
    print >> sys.stderr, "consider to introduce caps (ACE and NME) at the dangling N- and C-terminals."

  return()

#========================================
def find_incomplete(recordlist):
#========================================
# finds residues with missing heavy atoms in the following list of residues;
# dictionary with number of heavy atoms:
  #PAJ TODO:complete this list with nucleic acids
  HEAVY = {'ALA':5,  'ARG':11, 'ASN':8,  'ASP':8, \
           'CYS':6,  'GLN':9,  'GLU':9,  'GLY':4, \
           'HIS':10, 'ILE':8,  'LEU':8,  'LYS':9, \
           'MET':8,  'PHE':11, 'PRO':7,  'SER':6, \
           'THR':7,  'TRP':14, 'TYR':12, 'VAL':7, \
           'HID':10, 'HIE':10, 'HIN':10, 'HIP':10, \
           'CYX':6,  'ASH':8,  'GLH':9,  'LYH':9}
  print >> sys.stderr, '\n---------- Missing Heavy Atoms (Renumbered Residues!)' 
  resnum = []
  resname = []
  resheavy = []
  flag = 0
  nheavy = 1
  length = len(recordlist)
  for i, record in enumerate(recordlist):
    if i == length-1:
      for j, res in enumerate(resnum):
        missing = HEAVY[resname[j]] - resheavy[j]
        if missing > 0:
          flag = 1
          print >> sys.stderr, "%s_%s misses %d heavy atom(s)"%(resname[j], res, missing) 
      if flag == 0:
        print >> sys.stderr, "None"
      return()
    if not HEAVY.has_key(record[5]):
      continue
    if recordlist[i+1][8] == record[8]:
      nheavy += 1
    else:
      resnum.append(record[8])
      resname.append(record[5])
      resheavy.append(nheavy)
      nheavy = 1
      continue
  return()

def run(arg_pdbout, arg_pdbin, 
        arg_nohyd = True, 
        arg_dry   = False, 
        arg_prot  = False, 
        arg_noter = False,
        arg_constph = False,
        arg_elbow = False):
  filename, extension = os.path.splitext(arg_pdbout)
  pdbin = arg_pdbin
  recordlist = pdb_read(pdbin, arg_noter)
  
  # wrap all atom names to pure standard (always):======================
  recordlist = atom_wrap(recordlist)
  # remove alternate locations and keep only the first one:=============
  recordlist = remove_altloc(recordlist)
  # remove hydrogens if option -y is used:==============================
  if arg_nohyd:
    recordlist = remove_hydrogens(recordlist)
  # find non-standard Amber residues:===================================
  non_standard(recordlist, filename)
  ns_names = None
  if arg_elbow:
    ns_names=non_standard_elbow(recordlist)
  # keep only protein:==================================================
  if arg_prot:
    recordlist = prot_only(recordlist, filename)
  # remove water if -d option used:=====================================
  if arg_dry:
    recordlist = remove_water(recordlist, filename)
  # renumber atoms and residues:========================================
  recordlist = renumber(recordlist, filename)
  # after this call, residue numbers refer to the ***new*** PDB file
  #=====================================================================
  # find histidines that might have to be changed
  if arg_constph:
    recordlist = constph(recordlist)
  else:
    recordlist = find_his(recordlist)
  # find possible S-S in the final protein:=============================
  recordlist, cnct = find_disulfide(recordlist, filename)
  # find possible gaps:==================================================
  find_gaps(recordlist)
  # count heavy atoms
  find_incomplete(recordlist)
  # =====================================================================
  # make final output to new PDB file
  pdb_write(recordlist, arg_pdbout, cnct)
  print >> sys.stderr, ""
  return ns_names
    
#========================================main===========================
if __name__ ==  "__main__":
  parser = OptionParser(version=__version__)
  parser.add_option("-i","--in", metavar = "FILE", dest = "pdbin",
                    help = "PDB input file                      (default: stdin)",
                    default='stdin')
  parser.add_option("-o","--out", metavar = "FILE", dest = "pdbout",
                    help = "PDB output file                     (default: stdout)",
                    default='stdout')
  parser.add_option("-y","--nohyd", action = "store_true", dest = "nohyd",
                    help = "remove all hydrogen atoms           (default: no)")
  parser.add_option("-d","--dry", action = "store_true", dest = "dry",
                    help = "remove all water molecules          (default: no)")
  parser.add_option("-p", "--prot", action = "store_true", dest = "prot",
                    help = "keep only Amber-compatible residues (default: no)")
  parser.add_option("--noter", action =  "store_true", dest = "noter",
                    help = "remove TER, MODEL, ENDMDL cards     (default: no)")
  parser.add_option("--constantph", action = "store_true", dest = "constantph",
                    help = "rename GLU,ASP,HIS for constant pH simulation")
  (opt, args) = parser.parse_args()

  if opt.pdbin == opt.pdbout:
    print >> sys.stderr, "The input and output file names cannot be the same!"

  # Make sure that if we are reading from stdin it's being directed from a pipe
  # or a file. We don't want to wait for user input that will never come.

  if opt.pdbin == 'stdin':
    if os.isatty(sys.stdin.fileno()):
      sys.exit(parser.print_help() or 1)

  run(opt.pdbout, opt.pdbin, opt.nohyd, opt.dry, opt.prot, opt.noter, 
      opt.constantph)