#!/usr/bin/env python
#======================================================================
# pymdgbsa.py version 0.7 (May 2011)
# copyright Novartis Institutes for Biomedical Research
# Basel, Switzerland
# Author: Romain M. Wolf
#======================================================================
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#=======================================================================
import os, re, sys, math, tempfile, shutil
from subprocess import Popen, PIPE
from optparse import OptionParser
whitespace = re.compile(r'\s+')
"""
Usage: pymdpbsa.py [options] or --help for HELP
Computes the free energy of interaction terms for a receptor-ligand
complex, given an single MD trajectory (ptraj-readable) of the complex
and the individual parameter-topology files for the complex,
the ligand, and the receptor.
This version calls the pbsa routine of AMBER Tools for PB, i.e.,
it will not work with AMBER Tools 1.2 or earlier...
There are five different options for the solvation term:
3 Generalized-Born methods (corresponding to igb = 1, 2, 5 in other
AMBER modules) with a simple linear function of the SASA for the
nonpolar part Gnp = 0.0072 * SASA;
2 Poisson-Boltzmann methods with different modes to treat
non-polar solvation terms:
(a) a standard Gnp = 0.005 * SASA + 0.86 (fast);
(b) the Ecav/Edisp split according to Luo et al. (slower);
When no solvation term is specified, a distance-dependent dielectric
function with epsilon = 1 is used.
Entropy terms are NOT evaluated.
******************************************************************
NOTE: Use the PB options 3 or 4 with care, look at the
pbsacontrol_solv3 and pbsacontrol_solv4 functions below and adjust
the settings to your taste...
******************************************************************
"""
#function definitions
#======================================================================
def nrgtable_gb(prmtop, project, P, start, stop, step, gb, sa):
#======================================================================
"""
Makes the actual energy tables by calling the NAB routine ffgbsa
* prmtop = prmtop file of the part for which the energy is to be computed
(ligand or receptor or complex)
* project = global project name (used to identify the tables)
* P = flag to define which part is computed:
L for ligand alone
R for receptor alone
C (or anything else) for entire complex
* start, stop, and step are the portions of the trajectory
to be used in the evaluation
NOTE: THESE VALUES MUST BE THE SAME AS IN THE TRAJECTORY SPLIT.
* gb = flag to include Generalized-Born (1, 2, or 5) or not (0)
no GB means distance-dependent dielectrics epsilon=r
* sa = flag to include the SASA term (always 1, i.e., set, in this version)
!!! Calls ffgbsa with runs the NAB routine ffgbsa !!!
"""
if (P == "L"):
middle = "L"
elif(P == "R"):
middle = "R"
else:
middle = "C"
table = open("%s.%s.nrg"%(project, middle), "w")
if start < 1:
start = 1
for x in range(start, stop+step, step):
# read the (temporary) PDB files of the required frames, one after the other
pdbfile = "%s.%s.pdb.%d"%(project, middle, x)
# compute the energy terms by calling ffgbsa
etot, ebat, evdw, ecoul, egb, sasa = ffgbsa(prmtop, pdbfile, gb, sa)
# write the values to the selected .nrg table
table.write("%4i %10.2f %10.2f %10.2f %10.2f %10.2f %10.2f\n"
%(x, etot, ebat, evdw, ecoul, egb, sasa))
table.flush()
table.close()
#======================================================================
def nrgtable_pb(prmtop, project, P, start, stop, step):
#======================================================================
"""
makes the actual energy tables by calling pbsarun
* prmtop = prmtop file of the part for which the energy is to be computed
(ligand or receptor or complex)
* project = global project name (used to identify the tables)
* P = flag to define which part is computed:
L for ligand alone
R for receptor alone
C (or anything else) for entire complex
* start, stop, and step are the portions of the trajectory
to be used in the evaluation
NOTE: THESE VALUES MUST BE THE SAME AS IN THE TRAJECTORY SPLIT.
"""
if (P == "L"):
middle = "L"
elif(P == "R"):
middle = "R"
else:
middle = "C"
table = open("%s.%s.nrg"%(project, middle), "w")
if start < 1:
start = 1
for x in range(start, stop+step, step):
# read the (temporary) CRD files of the required frames, one after the other
crdfile = "%s.%s.crd.%d"%(project, middle, x)
# compute the energy terms by calling pbsa
etot, evdw, ecoul, epb, ecav, edisp = pbsarun(prmtop, crdfile)
# write the values to the selected .nrg table
table.write("%4i %10.2f %10.2f %10.2f %10.2f %10.2f %10.2f\n"
%(x, etot, evdw, ecoul, epb, ecav, edisp))
table.flush()
table.close()
#======================================================================
def difftable_gb(project, start, step):
#======================================================================
"""
Generates the interaction energy table *.D.nrg.
Reads the three independent *.nrg tables for the ligand, the
receptor, and the complex.
Returns nothing, but generates the global file
'project.D.nrg'
"""
fvdw = 1.00 # weight for vdW term in final difference
# this value might become a changeable option in future releases
lignrg = open("%s.L.nrg"%project, 'r');
recnrg = open("%s.R.nrg"%project, 'r');
comnrg = open("%s.C.nrg"%project, 'r');
diffnrg = open("%s.D.nrg"%project, 'w');
current = start-step
for ligcurrent in lignrg.readlines():
reccurrent = recnrg.readline()
comcurrent = comnrg.readline()
lvalues = whitespace.split(ligcurrent.strip())
rvalues = whitespace.split(reccurrent.strip())
cvalues = whitespace.split(comcurrent.strip())
#here we reject diffs where sasa is zero in any of the nrg lines
if float(cvalues[6]) == 0. or float(rvalues[6]) == 0. or float(lvalues[6]) == 0.:
continue
dbat = float(cvalues[2])-float(rvalues[2])-float(lvalues[2]) #MM energy
dvdw = (float(cvalues[3])-float(rvalues[3])-float(lvalues[3]))*fvdw #vdW
dcoul = float(cvalues[4])-float(rvalues[4])-float(lvalues[4]) #Coulomb
dgb = float(cvalues[5])-float(rvalues[5])-float(lvalues[5]) #GB
dsasa = float(cvalues[6])-float(rvalues[6])-float(lvalues[6]) #SASA
dtot = dbat+dvdw+dcoul+dgb+dsasa #total
current+= step
diffnrg.write("%4i %10.2f %10.2f %10.2f %10.2f %10.2f %10.2f\n"
%(current, dtot, dbat, dvdw, dcoul, dgb, dsasa))
#======================================================================
def difftable_pb(project, start, step):
#======================================================================
"""
generates the interaction energy table *.D.nrg;
reads the three independent *.nrg tables for the ligand, the
receptor, and the complex;
returns nothing, but generates the global file
'project.D.nrg';
"""
lignrg = open("%s.L.nrg"%project, 'r');
recnrg = open("%s.R.nrg"%project, 'r');
comnrg = open("%s.C.nrg"%project, 'r');
diffnrg = open("%s.D.nrg"%project, 'w');
current = start-step
for ligcurrent in lignrg.readlines():
reccurrent = recnrg.readline()
comcurrent = comnrg.readline()
lvalues = whitespace.split(ligcurrent.strip())
rvalues = whitespace.split(reccurrent.strip())
cvalues = whitespace.split(comcurrent.strip())
dtot = float(cvalues[1])-float(rvalues[1])-float(lvalues[1])
dvdw = float(cvalues[2])-float(rvalues[2])-float(lvalues[2])
dcoul = float(cvalues[3])-float(rvalues[3])-float(lvalues[3])
dpb = float(cvalues[4])-float(rvalues[4])-float(lvalues[4])
dcav = float(cvalues[5])-float(rvalues[5])-float(lvalues[5])
ddisp = float(cvalues[6])-float(rvalues[6])-float(lvalues[6])
current+= step
diffnrg.write("%4i %10.2f %10.2f %10.2f %10.2f %10.2f %10.2f\n"
%(current, dtot, dvdw, dcoul, dpb, dcav, ddisp))
#======================================================================
def ffgbsa(prmtop, pdbfile, gb, sa):
#======================================================================
"""Computes force field terms, Generalized Born, and SASA
Calls ffgbsa, a NAB application written for this purpose
* prmtop = AMBER prmtop file for the molecular system;
* pdbfile = PDB file for the molecular system;
* gb = flag for Generalized Born: 1, 2, 5 for yes, else no
(if gb not 1, 2, 5, i.e., is not used, a distance-dependent
function eps = r is used instead)
* sa is a flag to compute the solvent-accessible surface,
in the current version, it MUST always be set to 1.
Returns energy components in tuple with 6 values in the order
etot (total energy)
ebat (sum of bond, angle, torsion terms)
evdw (the van der Waals energy)
ecoul (the Coulomb term)
egb (the Generalized-Born term, if asked for...)
sasa (the SASA cavity energy term if molsurf works correctly...)
NOTE 1:
The molsurf function in NAB usually works fine.
In the rare case of a problem, SA is set to zero for the problematic
frame, a warning is emitted, and later the frame is excluded from the
statistical evaluations
NOTE 2:
Atom radii used for molsurf are set in accordance with the radii used
in Holger Gohlke's MMPB/SA Perl scripts.
All radii are then incremented by 1.4 Angstroem and the surface is computed
with a probe of radius zero.
NOTE 3:
We currently use a surface tension term of 0.0072 to convert the SASA into kcal/mol.
"""
# surface tension
fsurf = 0.0072
gibbs = Popen(["ffgbsa", pdbfile, prmtop, gb, sa], stdout = PIPE)
gibbs.wait()
lines = gibbs.stdout.readlines()
# change in december 2009
# since the output of ffgbsa can vary in length, the relevant line numbers can vary,
# hence we scan for the first field in each line to make sure that the correct record is read
for line in lines:
energies = whitespace.split(line)
if energies[0] == "ff:":
etot = float(energies[2])
ebat = float(energies[3])
evdw = float(energies[4])
ecoul = float(energies[5])
egb = float(energies[7])
# check if sasa worked and if yes, compute energy contribution
# note that sasa = 0.00 will exclude this frame from evaluation of
# averages later
sasa = 0.00
for line in lines:
energies = whitespace.split(line)
if energies[0] == "sasa:":
sasa = float(energies[1]) * fsurf
# notice to the screen that some problems occured in molsurf
if sasa == 0.00:
print("\nfailure in molsurf")
# total energy is the etot term from the energy call plus the sasa energy
etot = etot + sasa
return etot, ebat, evdw, ecoul, egb, sasa
#======================================================================
def pbsarun(prmtop, crdfile):
#======================================================================
"""
runs pbsa on the specified prmtop and crd file pair,
uses temporary 'pbsasfe.in' command file,
returns the energy components fished from the temporary
output file 'pbsasfe.out'
"""
cmdline = "pbsa -O -i pbsasfe.in -o pbsasfe.out -p %s -c %s"%(prmtop, crdfile)
os.system(cmdline)
result = open('pbsasfe.out', 'r')
lines = result.readlines()
i=-1
for line in lines:
i=i+1
energies = whitespace.split(line)
if energies[1] == "FINAL":
etot = float(whitespace.split(lines[i+4])[3])
evdw = float(whitespace.split(lines[i+6])[11])
ecoul = float(whitespace.split(lines[i+7])[3])
epb = float(whitespace.split(lines[i+7])[6])
ecav = float(whitespace.split(lines[i+8])[2])
edisp = float(whitespace.split(lines[i+8])[5])
return etot, evdw, ecoul, epb, ecav, edisp
#======================================================================
def pbsacontrol_solv3():
#======================================================================
# for opt.solv = 3
"""
writes the pbsa control file for full PBSA energy terms
into a file 'pbsasfe.in'
see pbsa documentation for details...
simple SASA with Esasa = 0.005*SASA+0.86
NOTE: users who want to change PBSA settings should
do so in the lines below...
"""
cmd = open('pbsasfe.in', 'w')
cmd.write('''PB calculation (SASA only)
&cntrl
ntx=1, imin=1, igb=10, inp=1,
/
&pb
epsout=80.0, epsin=1.0, space=0.5, bcopt=6, dprob=1.4,
cutnb=0, eneopt=2,
accept=0.001, sprob=1.6, radiopt=0, fillratio=4,
maxitn=1000, arcres=0.0625,
cavity_surften=0.005, cavity_offset=0.86
/
''')
cmd.close()
#======================================================================
def pbsacontrol_solv4():
#======================================================================
# for opt.solv = 4
"""
writes the pbsa control file for full PBSA energy terms
into a file 'pbsasfe.in'
see pbsa documentation for details...
uses settings for SAV from Table 3 in Luo paper...
identical as settings for solvation free energy...
"""
cmd = open('pbsasfe.in', 'w')
cmd.write('''PB calculation SAV/DISP scheme
&cntrl
ntx=1, imin=1, igb=10, inp=2
/
&pb
npbverb=0, istrng=0.0, epsout=80.0, epsin=1.0,
radiopt=1, dprob=1.6,
space=0.5, nbuffer=0, fillratio=4.0,
accept=0.001, arcres=0.0625,
cutnb=0, eneopt=2,
decompopt=2, use_rmin=1, sprob=0.557, vprob=1.300,
rhow_effect=1.129, use_sav=1,
cavity_surften=0.0378, cavity_offset=-0.5692
/
''')
cmd.close()
#======================================================================
def stats_gb(project, P):
#======================================================================
"""
Generates statistics (average, standard deviation, and standard error)
NEW (September 2009) Takes care of excluding frames for which molsurf failed
"""
if (P == "L"):
middle = "L"
elif(P == "R"):
middle = "R"
elif(P == "D"):
middle = "D"
else:
middle = "C"
table = open("%s.%s.nrg"%(project, middle), "r")
lines = table.readlines()
records = len(lines)
current = 0
skip = 0
tot = 0.00
bat = 0.00
vdw = 0.00
coul = 0.00
gb = 0.00
sasa = 0.00
# averages
while(current opt.stop:
opt.start = opt.stop
if opt.stop < opt.start:
opt.stop = opt.start
if (opt.stop-opt.start)%opt.step != 0:
opt.stop = opt.stop - (opt.stop-opt.start)%opt.step
else: pass
gbflag = 0
pbflag = 0
# decide of GB or PB, note the gb must be passed as string here!
if opt.solv < 1 or opt.solv > 8 or opt.solv == 6:
gb = "0"
gbflag = 1
elif opt.solv in (1, 2, 5, 7, 8):
gb = "%s"%opt.solv
gbflag = 1
else:
pb = opt.solv
pbflag = 1
# prepare the correct pbsa command file if PB is chosen
if opt.solv == 3:
pbsacontrol_solv3()
elif opt.solv == 4:
pbsacontrol_solv4()
else: pass
# function calls: split trajectories, generate energy tables,
# make statistics and write out to the summary file
# ligand alone ------
if gbflag == 1:
split2pdb(cprm, traj, opt.project, striplig, "L",
opt.start, opt.stop, opt.step)
nrgtable_gb(lprm, opt.project , "L",
opt.start, opt.stop, opt.step, gb, sa)
else:
split2crd(cprm, traj, opt.project, striplig, "L",
opt.start, opt.stop, opt.step)
nrgtable_pb(lprm, opt.project , "L",
opt.start, opt.stop, opt.step)
# receptor alone -----
if gbflag == 1:
split2pdb(cprm, traj, opt.project, striprec, "R",
opt.start, opt.stop, opt.step)
nrgtable_gb(rprm, opt.project, "R",
opt.start, opt.stop, opt.step, gb, sa)
else:
split2crd(cprm, traj, opt.project, striprec, "R",
opt.start, opt.stop, opt.step)
nrgtable_pb(rprm, opt.project, "R",
opt.start, opt.stop, opt.step)
# complex -----
if gbflag == 1:
split2pdb(cprm, traj, opt.project, ":ZZZ", "C",
opt.start, opt.stop, opt.step)
nrgtable_gb(cprm, opt.project, "C",
opt.start, opt.stop, opt.step, gb, sa)
else:
split2crd(cprm, traj, opt.project, ":ZZZ", "C",
opt.start, opt.stop, opt.step)
nrgtable_pb(cprm, opt.project, "C",
opt.start, opt.stop, opt.step)
# interaction energy -----
if gbflag == 1:
difftable_gb(opt.project, opt.start, opt.step)
else:
difftable_pb(opt.project, opt.start, opt.step)
# make statistics and generate summary output file
if gbflag == 1:
summary_gb(opt)
else:
summary_pb(opt)
# copy files to keep to the parent directory
shutil.copy('%s.sum'%opt.project, '../%s.sum'%opt.project)
for mid in ['L', 'R', 'C', 'D']:
shutil.copy('%s.%s.nrg'%(opt.project,mid), '../%s.%s.nrg'%(opt.project,mid))
# remove the temporary directory if --clean option was specified
os.chdir('../')
if(opt.clean):
shutil.rmtree(tmpdir)
else:
sys.exit(0)