""" This module provides a means for users to take advantage of MMPBSA.py's parsing ability. It exposes the free energy data (optionally to numpy arrays) so that users can write a simple script to carry out custom data analyses, leveraging the full power of Python's extensions, if they want (e.g., numpy, scipy, etc.) GPL LICENSE INFO Copyright (C) 2010 Jason Swails, Bill Miller III, and Dwight McGee This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ from __future__ import division from copy import deepcopy from MMPBSA_mods import infofile, main, amber_outputs from MMPBSA_mods.exceptions import SetupError, NoFileExists from MMPBSA_mods.fake_mpi import MPI import os import warnings # Try importing numpy to see if we have it available try: import numpy as np HAS_NUMPY = True except ImportError: from array import array HAS_NUMPY = False __all__ = ['load_mmpbsa_info'] # Abstract the array-making routines if HAS_NUMPY: make_array = lambda x: np.fromiter(x, float) make_array_len = lambda x: np.zeros(x, float) else: make_array = lambda x: array('d', x) make_array_len = lambda x: array('d', [0 for i in range(x)]) class mmpbsa_data(dict): """ Main class that holds all of the Free Energy data """ def __init__(self, app): """ Load data from an info object """ global HAS_NUMPY if not isinstance(app, main.MMPBSA_App): raise TypeError('mmpbsa_data can only take an MMPBSA_App!') # Loop through all of the data if not hasattr(app, 'calc_types'): raise SetupError('Output files have not yet been parsed!') # Now load the data into the dict has_mutant = False # See if we are doing stability self.stability = app.stability # Now load the data for key in app.calc_types: if key == 'mutant': has_mutant = True continue self[key] = {} tmpdict = {} for dkey in app.calc_types[key]['complex'].data: tmpdict[dkey] = make_array(app.calc_types[key]['complex'].data[dkey]) self[key]['complex'] = tmpdict if not self.stability: tmpdict = {} for dkey in app.calc_types[key]['receptor'].data: tmpdict[dkey] = make_array( app.calc_types[key]['receptor'].data[dkey]) self[key]['receptor'] = tmpdict tmpdict = {} for dkey in app.calc_types[key]['ligand'].data: tmpdict[dkey] = make_array( app.calc_types[key]['ligand'].data[dkey]) self[key]['ligand'] = tmpdict # Are we doing a mutant? if has_mutant: self.mutant = {} for key in app.calc_types['mutant']: self.mutant[key] = {} tmpdict = {} for dkey in app.calc_types['mutant'][key]['complex'].data: tmpdict[dkey] = make_array( app.calc_types['mutant'][key]['complex'].data[dkey]) self.mutant[key]['complex'] = tmpdict if not self.stability: self.mutant[key]['receptor'] = {} tmpdict = {} for dkey in app.calc_types['mutant'][key]['receptor'].data: tmpdict[dkey] = make_array( app.calc_types['mutant'][key]['receptor'].data[dkey]) self.mutant[key]['receptor'] = tmpdict tmpdict = {} for dkey in app.calc_types['mutant'][key]['ligand'].data: tmpdict[dkey] = make_array( app.calc_types['mutant'][key]['ligand'].data[dkey]) self.mutant[key]['ligand'] = tmpdict def __iadd__(self, other): """ Adding one to another extends every array. The way we do this depends on whether we're using numpy arrays or """ global HAS_NUMPY if HAS_NUMPY: return self._add_numpy(other) else: return self._add_nonumpy(other) def _add_numpy(self, other): """ If we have numpy available, we need to extend every array in a numpy-valid way """ used_keys = [] for key in self: used_keys.append(key) try: for dkey in self[key]['complex']: _combine_np_arrays(self[key]['complex'][dkey], other[key]['complex'][dkey]) for dkey in self[key]['receptor']: _combine_np_arrays(self[key]['receptor'][dkey], other[key]['receptor'][dkey]) for dkey in self[key]['ligand']: _combine_np_arrays(self[key]['ligand'][dkey], other[key]['ligand'][dkey]) except KeyError: pass for key in other: if key in used_keys: continue # If we didn't have a particular calc type, copy that array in here self[key] = deepcopy(other[key]) # Check mutant statuses. If the other has mutant and I don't, copy other # If we both have mutant, combine. If only I do, already done if self.mutant and not other.mutant: self.mutant = deepcopy(other.mutant) elif self.mutant and other.mutant: used_keys_mutant = [] for key in self.mutant: used_keys_mutant.append(key) try: for dkey in self[key]['complex']: _combine_np_arrays(self[key]['complex'][dkey], other[key]['complex'][dkey]) for dkey in self[key]['receptor']: _combine_np_arrays(self[key]['receptor'][dkey], other[key]['receptor'][dkey]) for dkey in self[key]['ligand']: _combine_np_arrays(self[key]['ligand'][dkey], other[key]['ligand'][dkey]) except KeyError: pass for key in other.mutant: if key in used_keys_mutant: continue self.mutant[key] = deepcopy(other.mutant[key]) def _add_nonumpy(self, other): """ Adds up 2 array objects (just use 'extend' method) """ used_keys = [] for key in self: used_keys.append(key) for dkey in self[key]: try: self[key]['complex'][dkey].extend(other[key]['complex'][dkey]) self[key]['receptor'][dkey].extend(other[key]['receptor'][dkey]) self[key]['ligand'][dkey].extend(other[key]['ligand'][dkey]) except KeyError: pass for key in other: if key in used_keys: continue # If we didn't have a particular calc type, copy that array in here self[key] = deepcopy(other[key]) # Check mutant statuses. If the other has mutant and I don't, copy other # If we both have mutant, combine. If only I do, already done if self.mutant and not other.mutant: self.mutant = deepcopy(other.mutant) elif self.mutant and other.mutant: used_keys_mutant = [] for key in self.mutant: used_keys_mutant.append(key) for dkey in self.mutant[key]: try: self.mutant[key]['complex'].extend( other.mutant[key]['complex']) self.mutant[key]['receptor'].extend( other.mutant[key]['receptor']) self.mutant[key]['ligand'].extend( other.mutant[key]['ligand']) except KeyError: pass for key in other.mutant: if key in used_keys_mutant: continue self.mutant[key] = deepcopy(other.mutant[key]) def _combine_np_arrays(nparray1, nparray2): origsize = nparray1.shape[0] nparray1.resize(origsize + nparray2.shape[0]) for i in range(nparray2.shape[0]): nparray1[origsize + i] = nparray2[i] class APIDecompOut(amber_outputs.DecompOut): def __init__(self, basename, surften, num_files, verbose, nframes): amber_outputs.DecompOut.__init__(self, basename, None, surften, False, num_files, verbose) self.array_data = {} # Make a new dict for all printed tokens (TDC,SDC,BDC) for key in self.allowed_tokens: self.array_data[key] = {} for i in range(nframes): for j in range(self.num_terms): rnum, internal, vdw, eel, pol, sas, tot = self.get_next_term(key) try: self.array_data[key][rnum]['int'][i] = internal self.array_data[key][rnum]['vdw'][i] = vdw self.array_data[key][rnum]['eel'][i] = eel self.array_data[key][rnum]['pol'][i] = pol self.array_data[key][rnum]['sas'][i] = sas self.array_data[key][rnum]['tot'][i] = tot except KeyError: # This is the first frame, we don't have the rnum dict yet, so # make that dict here and create the arrays, then fill the # first term self.array_data[key][rnum] = {} for k in ('int', 'vdw', 'eel', 'pol', 'sas', 'tot'): self.array_data[key][rnum][k] = make_array_len(nframes) self.array_data[key][rnum]['int'][i] = internal self.array_data[key][rnum]['vdw'][i] = vdw self.array_data[key][rnum]['eel'][i] = eel self.array_data[key][rnum]['pol'][i] = pol self.array_data[key][rnum]['sas'][i] = sas self.array_data[key][rnum]['tot'][i] = tot class APIDecompOut(amber_outputs.DecompOut): def __init__(self, basename, surften, num_files, verbose, nframes): amber_outputs.DecompOut.__init__(self, basename, None, surften, False, num_files, verbose) self.array_data = {} # Make a new dict for all printed tokens (TDC,SDC,BDC) for key in self.allowed_tokens: self.array_data[key] = {} for i in range(nframes): for key in self.allowed_tokens: for j in range(self.num_terms): rnum, internal, vdw, eel, pol, sas, tot = self.get_next_term(key) try: self.array_data[key][rnum]['int'][i] = internal self.array_data[key][rnum]['vdw'][i] = vdw self.array_data[key][rnum]['eel'][i] = eel self.array_data[key][rnum]['pol'][i] = pol self.array_data[key][rnum]['sas'][i] = sas self.array_data[key][rnum]['tot'][i] = tot except KeyError: # This is the first frame, we don't have the rnum dict yet, so # make that dict here and create the arrays, then fill the # first term self.array_data[key][rnum] = {} for k in ('int', 'vdw', 'eel', 'pol', 'sas', 'tot'): self.array_data[key][rnum][k] = make_array_len(nframes) self.array_data[key][rnum]['int'][i] = internal self.array_data[key][rnum]['vdw'][i] = vdw self.array_data[key][rnum]['eel'][i] = eel self.array_data[key][rnum]['pol'][i] = pol self.array_data[key][rnum]['sas'][i] = sas self.array_data[key][rnum]['tot'][i] = tot class APIPairDecompOut(amber_outputs.PairDecompOut): def __init__(self, basename, surften, num_files, verbose, nframes): amber_outputs.DecompOut.__init__(self, basename, None, surften, False, num_files, verbose) self.array_data = {} # Make a new dict for all printed tokens (TDC,SDC,BDC) for key in self.allowed_tokens: self.array_data[key] = {} for i in range(nframes): for key in self.allowed_tokens: for j in range(self.num_terms): rnum, rnum2, internal, vdw, eel, pol, sas, tot = \ self.get_next_term(key) dkey = '%d-%d' % (rnum, rnum2) try: self.array_data[key][dkey]['int'][i] = internal self.array_data[key][dkey]['vdw'][i] = vdw self.array_data[key][dkey]['eel'][i] = eel self.array_data[key][dkey]['pol'][i] = pol self.array_data[key][dkey]['sas'][i] = sas self.array_data[key][dkey]['tot'][i] = tot except KeyError: # This is the first frame, we don't have the rnum dict yet, so # make that dict here and create the arrays, then fill the # first term self.array_data[key][dkey] = {} for k in ('int', 'vdw', 'eel', 'pol', 'sas', 'tot'): self.array_data[key][dkey][k] = make_array_len(nframes) self.array_data[key][dkey]['int'][i] = internal self.array_data[key][dkey]['vdw'][i] = vdw self.array_data[key][dkey]['eel'][i] = eel self.array_data[key][dkey]['pol'][i] = pol self.array_data[key][dkey]['sas'][i] = sas self.array_data[key][dkey]['tot'][i] = tot def load_mmpbsa_info(fname): """ Loads up an MMPBSA.py info file and returns a mmpbsa_data instance with all of the data available in numpy arrays if numpy is available. The returned object is a mmpbsa_data instance. mmpbsa_data attributes: ----------------------- o Derived from "dict" o Each solvent model is a dictionary key for a numpy array (if numpy is available) or array.array (if numpy is unavailable) for each of the species (complex, receptor, ligand) present in the calculation. o The alanine scanning mutant data is under another dict denoted by the 'mutant' key. Data Layout: ------------ Solvent Model | Dictionary Key | Data Keys Available ------------------------------------------------------------------- Generalized Born | 'gb' | EGB, ESURF, * Poisson-Boltzmann | 'pb' | EPB, EDISPER, ECAVITY, * 3D-RISM (GF) | 'rism gf' | 3D-RISM (Standard)| 'rism std' | Normal Mode | 'nmode' | Quasi-harmonic | 'qh' | * == TOTAL, VDW, EEL, 1-4 EEL, 1-4 VDW, BOND, ANGLE, DIHED The keys above are entries for the main dict as well as the sub-dict whose key is 'mutant' in the main dict. Each entry in the main (and mutant sub-) dict is, itself, a dict with 1 or 3 keys; 'complex', 'receptor', 'ligand'; where 'receptor' and 'ligand' are missing for stability calculations. If numpy is available, all data will be numpy.ndarray instances. Otherwise, all data will be array.array instances. All of the objects referenced by the listed 'Dictionary Key's are dicts in which the listed 'Data Keys Available' are keys to the data arrays themselves Examples: --------- # Load numpy for our analyses (optional) import numpy as np # Load the _MMPBSA_info file: mydata = load_mmpbsa_info('_MMPBSA_info') # Access the complex GB data structure and calculate the autocorr. fcn. autocorr = np.correlate(mydata['gb']['complex']['TOTAL'], mydata['gb']['complex']['TOTAL']) # Calculate the standard deviation of the alanine mutant receptor in PB print mydata.mutant['pb']['receptor']['TOTAL'].std() """ if not HAS_NUMPY: warnings.warn('numpy was not found. Data will be packed in normal Python ' 'arrays. Install numpy for more efficient array handling.', NotImplemented) if not isinstance(fname, str): raise TypeError('load_mmpbsa_info requires a MMPBSA.py info file name!') if not os.path.exists(fname): raise NoFileExists("cannot find %s!" % fname) app = main.MMPBSA_App(MPI) info = infofile.InfoFile(app) info.read_info(fname) app.loadcheck_prmtops() app.parse_output_files() return_data = mmpbsa_data(app) # Since Decomp data is parsed in a memory-efficient manner (by not storing # all of the data in arrays, but rather by printing each data point as it's # parsed), we need to handle the decomp data separately here if app.INPUT['decomprun']: # Simplify the decomp class instance creation if app.INPUT['idecomp'] in (1, 2): DecompClass = lambda x, y: APIDecompOut(x, y, app.mpi_size, app.INPUT['dec_verbose'], app.numframes) else: DecompClass = lambda x, y: APIPairDecompOut(x, y, app.mpi_size, app.INPUT['dec_verbose'], app.numframes) if not app.INPUT['mutant_only']: # Do normal GB if app.INPUT['gbrun']: return_data['decomp'] = {'gb' : {}} return_data['decomp']['gb']['complex'] = DecompClass( app.FILES.prefix + 'complex_gb.mdout', app.INPUT['surften']).array_data if not app.stability: return_data['decomp']['gb']['receptor'] = DecompClass( app.FILES.prefix + 'receptor_gb.mdout', app.INPUT['surften']).array_data return_data['decomp']['gb']['ligand'] = DecompClass( app.FILES.prefix + 'ligand_gb.mdout', app.INPUT['surften']).array_data # Do normal PB if app.INPUT['pbrun']: return_data['decomp'] = {'pb' : {}} return_data['decomp']['pb']['complex'] = DecompClass( app.FILES.prefix + 'complex_pb.mdout', app.INPUT['surften']).array_data if not app.stability: return_data['decomp']['pb']['receptor'] = DecompClass( app.FILES.prefix + 'receptor_pb.mdout', app.INPUT['surften']).array_data return_data['decomp']['pb']['ligand'] = DecompClass( app.FILES.prefix + 'ligand_pb.mdout', app.INPUT['surften']).array_data if app.INPUT['alarun']: # Do mutant GB if app.INPUT['gbrun']: return_data.mutant['decomp'] = {'gb' : {}} return_data.mutant['decomp']['gb']['complex'] = DecompClass( app.FILES.prefix + 'mutant_complex_gb.mdout', app.INPUT['surften']).array_data if not app.stability: return_data.mutant['decomp']['gb']['receptor'] = DecompClass( app.FILES.prefix + 'mutant_receptor_gb.mdout', app.INPUT['surften']).array_data return_data.mutant['decomp']['gb']['ligand'] = DecompClass( app.FILES.prefix + 'mutant_ligand_gb.mdout', app.INPUT['surften']).array_data # Do mutant PB if app.INPUT['pbrun']: return_data.mutant['decomp'] = {'pb' : {}} return_data.mutant['decomp']['pb']['complex'] = DecompClass( app.FILES.prefix + 'mutant_complex_pb.mdout', app.INPUT['surften']).array_data if not app.stability: return_data.mutant['decomp']['pb']['receptor'] = DecompClass( app.FILES.prefix + 'mutant_receptor_pb.mdout', app.INPUT['surften']).array_data return_data.mutant['decomp']['pb']['ligand'] = DecompClass( app.FILES.prefix + 'mutant_ligand_pb.mdout', app.INPUT['surften']).array_data return return_data