# $Id$ # # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC # # @@ All Rights Reserved @@ # This file is part of the RDKit. # The contents are covered by the terms of the BSD license # which is included in the file license.txt, found at the root # of the RDKit source tree. # """ Atom-based calculation of LogP and MR using Crippen's approach Reference: S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) """ from __future__ import print_function import os from rdkit import RDConfig from rdkit import Chem from rdkit.Chem import rdMolDescriptors import numpy _smartsPatterns = {} _patternOrder = [] # this is the file containing the atom contributions defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt') def _ReadPatts(fileName): """ *Internal Use Only* parses the pattern list from the data file """ patts = {} order = [] with open(fileName,'r') as f: lines = f.readlines() for line in lines: if line[0] != '#': splitLine = line.split('\t') if len(splitLine)>=4 and splitLine[0] != '': sma = splitLine[1] if sma!='SMARTS': sma.replace('"','') try: p = Chem.MolFromSmarts(sma) except: pass else: if p: if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789': cha = splitLine[0][:2] else: cha = splitLine[0][0] logP = float(splitLine[2]) if splitLine[3] != '': mr = float(splitLine[3]) else: mr = 0.0 if cha not in order: order.append(cha) l = patts.get(cha,[]) l.append((sma,p,logP,mr)) patts[cha] = l else: print('Problems parsing smarts: %s'%(sma)) return order,patts _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs def _pyGetAtomContribs(mol,patts=None,order=None,verbose=0,force=0): """ *Internal Use Only* calculates atomic contributions to the LogP and MR values if the argument *force* is not set, we'll use the molecules stored _crippenContribs value when possible instead of re-calculating. **Note:** Changes here affect the version numbers of MolLogP and MolMR as well as the VSA descriptors in Chem.MolSurf """ if not force and hasattr(mol,'_crippenContribs'): return mol._crippenContribs if patts is None: patts = _smartsPatterns order = _patternOrder nAtoms = mol.GetNumAtoms() atomContribs = [(0.,0.)]*nAtoms doneAtoms=[0]*nAtoms nAtomsFound=0 done = False for cha in order: pattVect = patts[cha] for sma,patt,logp,mr in pattVect: #print('try:',entry[0]) for match in mol.GetSubstructMatches(patt,False,False): firstIdx = match[0] if not doneAtoms[firstIdx]: doneAtoms[firstIdx]=1 atomContribs[firstIdx] = (logp,mr) if verbose: print('\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr)) nAtomsFound+=1 if nAtomsFound>=nAtoms: done=True break if done: break mol._crippenContribs = atomContribs return atomContribs def _Init(): global _smartsPatterns,_patternOrder if _smartsPatterns == {}: _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) def _pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1): """ DEPRECATED """ if addHs < 0: mol = Chem.AddHs(inMol,1) elif addHs > 0: mol = Chem.AddHs(inMol,0) else: mol = inMol if patts is None: global _smartsPatterns,_patternOrder if _smartsPatterns == {}: _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) patts = _smartsPatterns order = _patternOrder atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) return numpy.sum(atomContribs,0)[0] _pyMolLogP.version="1.1.0" def _pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1): """ DEPRECATED """ if addHs < 0: mol = Chem.AddHs(inMol,1) elif addHs > 0: mol = Chem.AddHs(inMol,0) else: mol = inMol if patts is None: global _smartsPatterns,_patternOrder if _smartsPatterns == {}: _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) patts = _smartsPatterns order = _patternOrder atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) return numpy.sum(atomContribs,0)[1] _pyMolMR.version="1.1.0" MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0] MolLogP.version=rdMolDescriptors._CalcCrippenDescriptors_version MolLogP.__doc__=""" Wildman-Crippen LogP value Uses an atom-based scheme based on the values in the paper: S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) **Arguments** - inMol: a molecule - addHs: (optional) toggles adding of Hs to the molecule for the calculation. If true, hydrogens will be added to the molecule and used in the calculation. """ MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1] MolMR.version=rdMolDescriptors._CalcCrippenDescriptors_version MolMR.__doc__=""" Wildman-Crippen MR value Uses an atom-based scheme based on the values in the paper: S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) **Arguments** - inMol: a molecule - addHs: (optional) toggles adding of Hs to the molecule for the calculation. If true, hydrogens will be added to the molecule and used in the calculation. """ if __name__=='__main__': import sys if len(sys.argv): ms = [] verbose=0 if '-v' in sys.argv: verbose=1 sys.argv.remove('-v') for smi in sys.argv[1:]: ms.append((smi,Chem.MolFromSmiles(smi))) for smi,m in ms: print('Mol: %s'%(smi)) logp = MolLogP(m,verbose=verbose) print('----') mr = MolMR(m,verbose=verbose) print('Res:',logp,mr) newM = Chem.AddHs(m) logp = MolLogP(newM,addHs=0) mr = MolMR(newM,addHs=0) print('\t',logp,mr) print('-*-*-*-*-*-*-*-*')