// $Id: mmdb_utils.h $ // ================================================================= // // CCP4 Coordinate Library: support of coordinate-related // functionality in protein crystallography applications. // // Copyright (C) Eugene Krissinel 2000-2008. // // This library is free software: you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License version 3, modified in accordance with the provisions // of the license to address the requirements of UK law. // // You should have received a copy of the modified GNU Lesser // General Public License along with this library. If not, copies // may be downloaded from http://www.ccp4.ac.uk/ccp4license.php // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Lesser General Public License for more details. // // ================================================================= // // 23.10.15 <-- Date of Last Modification. // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // ----------------------------------------------------------------- // // **** Module : MMDBF_Utils // ~~~~~~~~~ // **** Project : MacroMolecular Data Base (MMDB) // ~~~~~~~~~ // // **** Classes : mmdb::ContainerClass ( containered class template ) // ~~~~~~~~~ mmdb::ContString ( containered string ) // mmdb::ClassContainer ( container of classes ) // mmdb::AtomPath ( atom path ID ) // mmdb::QuickSort ( quick sort of integers ) // // **** Functions : Date9to11 ( DD-MMM-YY -> DD-MMM-YYYY ) // ~~~~~~~~~~~ Date11to9 ( DD-MMM-YYYY -> DD-MMM-YY ) // Date9toCIF ( DD-MMM-YY -> YYYY-MM-DD ) // Date11toCIF( DD-MMM-YYYY -> YYYY-MM-DD ) // DateCIFto9 ( YYYY-MM-DD -> DD-MMM-YY ) // DateCIFto11( YYYY-MM-DD -> DD-MMM-YYYY ) // GetInteger ( reads integer from a string ) // GetReal ( reads real from a string ) // GetIntIns ( reads integer and insert code ) // PutInteger ( writes integer into a string ) // PutRealF ( writes real in F-form into a string ) // PutIntIns ( writes integer and insert code ) // CIFGetInteger ( reads and deletes int from CIF ) // CIFGetReal ( reads and deletes real from CIF ) // CIFGetString ( reads and deletes string from CIF) // CIFGetInteger1 (reads and del-s int from CIF loop) // CIFGetReal1 (reads and del-s int from CIF loop) // Mat4Inverse ( inversion of 4x4 matrices ) // GetErrorDescription (ascii line to an Error_XXXXX) // ParseAtomID ( parses atom ID line ) // ParseResID ( parses residue ID line ) // ParseAtomPath ( parses full atom path ) // // (C) E. Krissinel 2000-2015 // // ================================================================= // #ifndef __MMDB_Utils__ #define __MMDB_Utils__ #include "mmdb_io_stream.h" #include "mmdb_mmcif_.h" #include "mmdb_defs.h" namespace mmdb { // ================== Date functions =================== // converts DD-MMM-YY to DD-MMM-YYYY; appends terminating zero extern void Date9to11 ( cpstr Date9, pstr Date11 ); // converts DD-MMM-YYYY to DD-MMM-YY; does not append terminating zero extern void Date11to9 ( cpstr Date11, pstr Date9 ); // converts DD-MMM-YY to YYYY-MM-DD; appends terminating zero extern void Date9toCIF ( cpstr Date9, pstr DateCIF ); // converts DD-MMM-YYYY to YYYY-MM-DD; appends terminating zero extern void Date11toCIF ( cpstr Date11, pstr DateCIF ); // converts YYYY-MM-DD to DD-MMM-YY; appends terminating zero extern void DateCIFto9 ( cpstr DateCIF, pstr Date9 ); // converts YYYY-MM-DD to DD-MMM-YYYY; appends terminating zero extern void DateCIFto11 ( cpstr DateCIF, pstr Date11 ); // ================= Format functions ================== // Returns true if S contains an integer number in its // first M characters. This number is returned in N. // The return is false if no integer number may be // recognized. In this case, N is assigned MinInt4 value. extern bool GetInteger ( int & N, cpstr S, int M ); // Returns true if S contains a real number in its // first M characters. This number is returned in R. // The return is false if no real number may be // recognized. In this case, R is assigned -MaxReal value. extern bool GetReal ( realtype & R, cpstr S, int M ); // Returns true if S contains an integer number in its // first M characters. This number is returned in N. In addition // to that, GetIntIns() retrieves the insertion code which may // follow the integer and returns it in "ins" (1 character + // terminating 0). // The return is false if no integer number may be // recognized. In this case, N is assigned MinInt4 value, // "ins" just returns (M+1)th symbol of S (+terminating 0). extern bool GetIntIns ( int & N, pstr ins, cpstr S, int M ); // Integer N is converted into ASCII string of length M // and pasted onto first M characters of string S. No // terminating zero is added. // If N is set to MinInt4, then first M characters of // string S are set to space. extern void PutInteger ( pstr S, int N, int M ); // Real R is converted into ASCII string of length M // and pasted onto first M characters of string S. No // terminating zero is added. The conversion is done // according to fixed format FM.L // If R is set to -MaxReal, then first M characters of // string S are set to the space character. extern void PutRealF ( pstr S, realtype R, int M, int L ); // Integer N is converted into ASCII string of length M // and pasted onto first M characters of string S. No // terminating zero is added. The insert code ins is put // immediately after the integer. // If N is set to MinInt4, then first M+1 characters of // string S are set to space, and no insert code are // appended. extern void PutIntIns ( pstr S, int N, int M, cpstr ins ); // CIFInteger(..), CIFReal(..) and CIFGetString(..) automate // extraction and analysis of data from CIF file. If the data // is erroneous or absent, they store an error message in // CIFErrorLocation string (below) and return non-zero. extern ERROR_CODE CIFGetInteger ( int & I, mmcif::PStruct Struct, cpstr Tag, bool Remove=true ); extern ERROR_CODE CIFGetReal ( realtype & R, mmcif::PStruct Struct, cpstr Tag, bool Remove=true ); extern ERROR_CODE CIFGetString ( pstr S, mmcif::PStruct Struct, cpstr Tag, int SLen, cpstr DefS, bool Remove=true ); extern ERROR_CODE CIFGetInteger ( int & I, mmcif::PLoop Loop, cpstr Tag, int & Signal ); extern ERROR_CODE CIFGetIntegerD ( int & I, mmcif::PLoop Loop, cpstr Tag, int defValue=MinInt4 ); extern ERROR_CODE CIFGetInteger1 ( int & I, mmcif::PLoop Loop, cpstr Tag, int nrow ); extern ERROR_CODE CIFGetReal ( realtype & R, mmcif::PLoop Loop, cpstr Tag, int & Signal ); extern ERROR_CODE CIFGetReal1 ( realtype & R, mmcif::PLoop Loop, cpstr Tag, int nrow ); extern ERROR_CODE CIFGetString ( pstr S, mmcif::PLoop Loop, cpstr Tag, int row, int SLen, cpstr DefS ); // Calculates AI=A^{-1} extern void Mat4Inverse ( const mat44 & A, mat44 & AI ); // Calculates A=B*C extern void Mat4Mult ( mat44 & A, const mat44 & B, const mat44 & C ); // Calculates A=B^{-1}*C extern void Mat4Div1 ( mat44 & A, const mat44 & B, const mat44 & C ); // Calculates A=B*C^{-1} extern void Mat4Div2 ( mat44 & A, const mat44 & B, const mat44 & C ); // Calculates determinant of the rotation part extern realtype Mat4RotDet ( mat44 & T ); // Sets up a unit matrix extern void Mat4Init ( mat44 & A ); extern void Mat3Init ( mat33 & A ); // Calculates AI=A^{-1}, returns determinant extern realtype Mat3Inverse ( const mat33 & A, mat33 & AI ); extern bool isMat4Unit ( const mat44 & A, realtype eps, bool rotOnly ); // Copies A into AC extern void Mat4Copy ( const mat44 & A, mat44 & ACopy ); extern void Mat3Copy ( const mat33 & A, mat33 & ACopy ); extern bool isMat4Eq ( const mat44 & A, const mat44 & B, realtype eps, bool rotOnly ); extern void TransformXYZ ( const mat44 & T, realtype & X, realtype & Y, realtype & Z ); extern realtype TransformX ( const mat44 & T, realtype X, realtype Y, realtype Z ); extern realtype TransformY ( const mat44 & T, realtype X, realtype Y, realtype Z ); extern realtype TransformZ ( const mat44 & T, realtype X, realtype Y, realtype Z ); extern char CIFErrorLocation[200]; // Returns ASCII string explaining the nature of // Error_xxxx error code. extern cpstr GetErrorDescription ( ERROR_CODE ErrorCode ); // ================ ContainerClass ==================== DefineClass(ContainerClass); DefineStreamFunctions(ContainerClass); class ContainerClass : public io::Stream { friend class ClassContainer; public : ContainerClass (); ContainerClass ( io::RPStream Object ); ~ContainerClass() {} // ConvertPDBASCII(..) will return one of the Error_XXXXX // constants, see virtual ERROR_CODE ConvertPDBASCII ( cpstr ) { return Error_NoError; } virtual void PDBASCIIDump ( pstr, int ) {} virtual bool PDBASCIIDump1 ( io::RFile ) { return false; } virtual void MakeCIF ( mmcif::PData, int ) {} // Append(..) should return true if CC is appended to this class. // If this is not the case, CC is merely put on the top of // container. // Note: Append(..) detects the necessity to append CC and // performs all the necessary actions for that. The rest of CC // will be disposed by Class Container. // Note: Class Container checks every new class, which is // being added to it (see CClassContainer::AddData(..)), only // against the top of container. virtual bool Append ( PContainerClass CC ); // GetCIF(..) extracts any necessary information from CIF and // returns in Signal: // Error_noError : the information was successfully extracted, // this instance of container class should be // stored, and unchanged value of Signal should // be passed to the next (newly created) instance // of this container class. // Error_EmptyCIF : there is no information for this type of // containers to extract. This instance of // container class should be deleted and input // for this type of container class terminated. // Other : the corresponding error. This instance of // container class should be deleted and the // whole input stopped. virtual ERROR_CODE GetCIF ( mmcif::PData, int & n ) { n = -1; return Error_EmptyCIF; } virtual CLASS_ID GetClassID () { return ClassID_Template; } virtual void Copy ( PContainerClass ) {} void write ( io::RFile ) {} void read ( io::RFile ) {} protected : int ContinuationNo; }; // ======================== ContString ========================= DefineClass(ContString); DefineStreamFunctions(ContString); class ContString : public ContainerClass { public : pstr Line; // a string ContString (); ContString ( cpstr S ); ContString ( io::RPStream Object ); ~ContString(); ERROR_CODE ConvertPDBASCII ( cpstr S ); void PDBASCIIDump ( pstr S, int N ); bool PDBASCIIDump1 ( io::RFile f ); void MakeCIF ( mmcif::PData CIF, int N ); // void GetCIF1 ( mmcif::PData CIF, ERROR_CODE & Signal, // int & pos ); bool Append ( PContainerClass ContString ); CLASS_ID GetClassID () { return ClassID_String; } void Copy ( PContainerClass CString ); void write ( io::RFile f ); void read ( io::RFile f ); protected : pstr CIFCategory,CIFTag; void InitString(); }; // ============== ClassContainer ==================== DefineClass(ClassContainer); DefineStreamFunctions(ClassContainer); class ClassContainer : public io::Stream { public : ClassContainer (); ClassContainer ( io::RPStream Object ); ~ClassContainer (); void FreeContainer (); void AddData ( PContainerClass Data ); virtual void PDBASCIIDump ( io::RFile f ); virtual void MakeCIF ( mmcif::PData CIF ); // GetCIF(..) will return one of the Error_XXXXX constants, // see virtual ERROR_CODE GetCIF ( mmcif::PData CIF, int ClassID ); virtual PContainerClass MakeContainerClass ( int ClassID ); // Copy will empty the class if parameter is set to NULL virtual void Copy ( PClassContainer CContainer ); inline int Length() { return length; } PContainerClass GetContainerClass ( int ContClassNo ); void write ( io::RFile f ); void read ( io::RFile f ); protected : int length; PPContainerClass Container; void Init(); }; // ====================== ID parsers ========================== DefineClass(AtomPath); DefineStreamFunctions(AtomPath); enum APATH_FLAG { APATH_ModelNo = 0x00000001, APATH_ChainID = 0x00000002, APATH_SeqNum = 0x00000004, APATH_InsCode = 0x00000008, APATH_ResName = 0x00000010, APATH_AtomName = 0x00000020, APATH_Element = 0x00000040, APATH_AltLoc = 0x00000080, APATH_Incomplete = 0x00000100, APATH_WC_ModelNo = 0x00001000, APATH_WC_ChainID = 0x00002000, APATH_WC_SeqNum = 0x00004000, APATH_WC_InsCode = 0x00008000, APATH_WC_ResName = 0x00010000, APATH_WC_AtomName = 0x00020000, APATH_WC_Element = 0x00040000, APATH_WC_AltLoc = 0x00080000 }; class AtomPath : public io::Stream { public : int modelNo; ChainID chainID; int seqNum; InsCode insCode; ResName resName; AtomName atomName; Element element; AltLoc altLoc; int isSet; AtomPath (); AtomPath ( cpstr ID ); AtomPath ( io::RPStream Object ); ~AtomPath (); // SetPath(..) parses the Atom Path ID string, which // may be incomplete. Below {..} means blocks that // may be omitted; any elements within such blocks // may be omitted as well. // // 1. If ID starts with '/' then the ID must be of // the following form: // /mdl{/chn{/seq(res).i{/atm[elm]:a}}} // // 2. If ID starts with a letter: // chn{/seq(res).i{/atm[elm]:a}} // // 3. If ID starts with a number or '(': // seq(res).i{/atm[elm]:a} // // 4. If ID contains colon ':' or '[' then // it may be just // atm[elm]:a // // The following are valid samples of IDs: // // /1 model number 1 // /1/A/23(GLU).A/CA[C]:A model number 1, chain A, // residue 23 GLU insertion code A, C-alpha // atom in alternative location A // A/23 residue 23 of chain A // CA[C]: atom C-alpha // [C] a carbon // *[C]:* same as above // :A an atom with insertion code A // 5 residue number 5 // (GLU) residue GLU // // All spaces are ignored. SetPath(..) sets bit of isSet // for each element present. Any element may be a wildcard // '*'. Wildcard for model will set modelNo=0, for sequence // number will set seqNum=MinInt4. // // Returns: // 0 <-> Ok // -1 <-> wrong numerical format for model // -2 <-> wrong numerical format for sequence number int SetPath ( cpstr ID ); void write ( io::RFile f ); void read ( io::RFile f ); protected : void InitAtomPath(); }; // -------------------------------------------------------------- DefineClass(QuickSort); class QuickSort : public io::Stream { public : QuickSort (); QuickSort ( io::RPStream Object ); ~QuickSort() {} virtual int Compare ( int i, int j ); virtual void Swap ( int i, int j ); void Sort ( void * sortdata, int data_len ); protected : int selSortLimit,dlen; void * data; void SelectionSort ( int left, int right ); int Partition ( int left, int right ); void Quicksort ( int left, int right ); }; // -------------------------------------------------------------- extern void takeWord ( pstr & p, pstr wrd, cpstr ter, int l ); // ParseAtomID(..) reads the atom ID of the following form: // {name} {[element]} {:altcode} // (here {} means that the item may be omitted; any field may have // value of wildcard '*'), and returns the atom name in aname, // element name - in elname, and alternate location code - in aloc. // Except for the alternate location code, missing items are // replaced by wildcards. Missing alternate location code is // returned as empty string "". // Leading spaces are allowed; any other space will terminate // the parsing. // The followings are perfectly valid atom IDs: // CA[C]:A (carbon C_alpha in location A) // CA[*]:A (either C_alpha or Ca in location A) // CA:A (same as above) // CA (either C_alpha or Ca with no location indicator) // CA[] (same as above) // CA[C]: (C_alpha with no location indicator) // [C] (any carbon with no location indicator) // [C]:* (any carbon with any location indicator) // *[C]:* (same as above) // :A (any atom in location A) // *[*]:A (same as above) // *[*]:* (any atom) // * (any atom with no alternate location indicator) extern void ParseAtomID ( cpstr ID, AtomName aname, Element elname, AltLoc aloc ); // ParseResID(..) reads the residue ID of the following form: // {seqnum} {(name)} {.inscode} // (here {} means that the item may be omitted; any field may have // value of wildcard '*'), and returns the sequence number in sn, // insertion code - in inscode, and residue name - in resname. // If a wildcard was specified for the sequence number, then // ParseResID(..) returns 1. Missing residue name is replaced by // the wildcard '*', and misisng insertion code is returned as empty // string "". // Leading spaces are allowed; any other space will terminate // the parsing. // Return 0 means Ok, 1 - wildcard for the sequence number, // 2 - an error in numerical format of the sequence number // (other items are parsed). // The followings are perfectly valid residue IDs: // 27(ALA).A (residue 27A ALA) // 27().A (residue 27A) // 27(*).A (same as above) // 27.A (same as above) // 27 (residue 27) // 27(). (same as above) // (ALA) (any ALA without insertion code) // (ALA). (same as above) // (ALA).* (any ALA) // *(ALA).* (any ALA) // .A (any residue with insertion code A) // *(*).A (same as above) // *(*).* (any residue) // * (any residue with no insertion code) extern int ParseResID ( cpstr ID, int & sn, InsCode inscode, ResName resname ); // ParseAtomPath(..) parses an atom path string of the following // structure: // /mdl/chn/seq(res).i/atm[elm]:a // where all items may be represented by a wildcard '*' and // mdl - model number (mandatory); at least model #1 is always // present; returned in mdl; on a wildcard mdl is set to 0 // chn - chain identifier ( mandatory); returned in chn; on a // wildcard chn is set to '*' // seq - residue sequence number (mandatory); returned in sn; // on a wild card ParseAtomPath(..) returns 1 // (res) - residue name in round brackets (may be omitted); // returnded in res; on a wildcard res is set to '*' // .i - insert code after a dot; if '.i' or 'i' is missing // then a residue without an insertion code is looked for; // returned in ic; on a wildcard (any insertion code would // do) ic is set to '*' // atm - atom name (mandatory); returned in atm; on a wildcard // atm is set to '*' // [elm] - chemical element code in square brackets; it may // be omitted but could be helpful for e.g. // distinguishing C_alpha and CA; returned in elm; // in a wildcard elm is set to '*' // :a - alternate location indicator after colon; if // ':a' or 'a' is missing then an atom without // alternate location indicator is looked for; returned // in aloc; on a wildcard (any alternate code would do) // aloc is set to '*'. // All spaces are ignored, all identifiers should be in capital // letters (comparisons are case-sensitive). // The atom path string may be incomplete. If DefPath is supplied, // the function will try to get missing elements from there. If // missing items may not be found in DefPath, they are replaced by // wildcards. // ParseAtomPath(..) returns the following bits: // 0 - Ok // APATH_Incomplete - if path contains wildcards. Wildcards for // residue name and chemical element will be // ignored here if sequence number and // atom name, correspondingly, are provided. // APATH_WC_XXXXX - wildcard for different elements // -1 - wrong numerical format for model (fatal) // -2 - wrong numerical format for seqNum (fatal) extern int ParseAtomPath ( cpstr ID, int & mdl, ChainID chn, int & sn, InsCode ic, ResName res, AtomName atm, Element elm, AltLoc aloc, PAtomPath DefPath=NULL ); extern int ParseSelectionPath ( cpstr CID, int & iModel, pstr Chains, int & sNum1, InsCode ic1, int & sNum2, InsCode ic2, pstr RNames, pstr ANames, pstr Elements, pstr altLocs ); extern void MakeSelectionPath ( pstr CID, int iModel, cpstr Chains, int sNum1, const InsCode ic1, int sNum2, const InsCode ic2, cpstr RNames, cpstr ANames, cpstr Elements, cpstr altLocs ); } // namespace mmdb #endif