/*************************************************************************** pdb.cpp - description ------------------- begin : Mon Feb 11 14:09:48 2002 copyright : (C) 2002 by Cavalli Andrea email : cavalli.unizh.ch **************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include #include #include #include #include #include #ifdef HAVE_BZLIB_H #include #endif #include using namespace Almost; void rename__frag(PDBFragment & frag, string a1, string a2){ for(size_t i=0;i split(string s,char c=' '){ string t; vector r; size_t i=0; while(i1){ size_t nsegm = model[0]->size(); //insegments for(size_t i=1;isize();++i){ if(model[i]->size()!= nsegm ){ nsegm_check=false; break;} } if(!nsegm_check){ cout<<"PDB WARNING: the number of segment is not the same in all MODELS!"<1){ size_t nsegm = model[0]->size(); //insegments for(size_t i=1;isize();++i){ if(model[i]->size()!= nsegm ){ nsegm_check=false; break;} } if(!nsegm_check){ cout<<"PDB WARNING: the number of segment is not the same in all MODELS!"<>tmp; str = "PDB file contains "+tmp+" model(s).\n"; for(size_t i=0;i>tmp2; str += "PDB model "+ tmp2 + " contains "; } { strstream out2; out2<< model[i]->size(); string tmp2; out2>>tmp2; str += tmp2 + " segment(s)\n"; for(size_t s = 0; ssize();s++){ str += (*model[i])[s].chainID()+" "; } str +="\n"; } } return str; } int PDB::align_seqres(string c, int m, int i, string &r){ using namespace boost; if(m>=(*this).size()) return -1; if(i>=(*this)[m].size()) return -1; PDBSegment & seg = (*this)[m][i]; if(seg.size()==0) return -1; r = three2one(seg[0].name()); int fseq = seg[0].fragSeq(); for(int i=1;ifseq)) return -1; for(int k=fseq+1;k seq1 = (*this)[m][i].sequence(PDBSegment::SHORT); vector seq_res = seqres(c); string seq; for(size_t i=0;i what; match_flag_type flags = match_default; regex exp(r); string::const_iterator begin = seq.begin(),end = seq.end(); if(regex_search(begin,end,what,exp,flags)){ return what[0].first - seq.begin(); } else return -1; } void PDB::read(istream &in){ PDBAtomLine * atomLine; // PDBHeaderLine * headerLine; while(in){ PDBLine line; line.readLine(in); int type = line.type(); switch(type){ case PDBType::HEADER: header_.push_back(PDBHeaderLine(line.line())); break; case PDBType::TITLE: title_.push_back(PDBTitleLine(line.line())); break; case PDBType::COMPND: compound_.push_back(PDBCompndLine(line.line())); break; case PDBType::SOURCE: source_.push_back(PDBSourceLine(line.line())); break; case PDBType::KEYWDS: keywords_.push_back(PDBKeywordsLine(line.line())); break; case PDBType::EXPDTA: expdata_.push_back(PDBExpdataLine(line.line())); break; case PDBType::AUTHOR: author_.push_back(PDBAuthorLine(line.line())); break; case PDBType::JRNL: journal_.push_back(PDBJournalLine(line.line())); break; case PDBType::REMARK: remark_.push_back(PDBRemarkLine(line.line())); break; case PDBType::SEQRES: seqres_.push_back(PDBSeqResLine(line.line())); break; case PDBType::MODEL: currentPDBModel = new PDBModel(); break; case PDBType::ENDMDL: model.push_back(currentPDBModel); segName=""; chainID = ""; currentPDBModel =NULL; currentPDBSegment = NULL; break; case PDBType::TER: // currentPDBModel->segment.push_back(currentPDBSegment); currentPDBSegment = NULL; segName=""; chainID = ""; break; case PDBType::END: if(model.size()==0){ if(currentPDBModel->size()>0) model.push_back(currentPDBModel); } break; case PDBType::ATOM: case PDBType::HETATM: if(type==PDBType::HETATM&&!keep_hetatm_){ // cout<<"DEBUG discarding HETATM"<altLoc !=' ')&&(atomLine->altLoc !='A')){ // cout <<"Alt location.....ignored"<serial<<" " // <name<<" " // <resName<<"\n"; if(currentPDBModel == NULL) currentPDBModel = new PDBModel(); if(currentPDBSegment == NULL){ currentPDBSegment = new PDBSegment(); currentPDBModel->segment.push_back(currentPDBSegment); segName = atomLine->segment(); chainID = atomLine->chainID; currentPDBSegment->set_name(segName); currentPDBSegment->set_chainID(chainID); // cout<<"Segment "<fragment.push_back(currentPDBFragment); fragName = atomLine->resName; fragSeq = atomLine->resSeq; // cout<<"Framgment "<set_name(fragName); currentPDBFragment->set_fragSeq(fragSeq); currentPDBFragment->set_iCode(atomLine->iCode); } else if((segName!=atomLine->segment())){ //if RESNAME and REDNUM unchanged ... ignore string old_res_name = currentPDBSegment->fragment.back()->name(); string current_res_name = atomLine->resName; if((old_res_name!=current_res_name)&& (currentPDBSegment->fragment.back()->fragSeq() >atomLine->resSeq)){ currentPDBSegment = new PDBSegment(); currentPDBModel->segment.push_back(currentPDBSegment); segName = atomLine->segment(); chainID = atomLine->chainID; currentPDBSegment->set_name(segName); currentPDBSegment->set_chainID(chainID); // cout<<"Segment "<fragment.push_back(currentPDBFragment); fragName = atomLine->resName; fragSeq = atomLine->resSeq; // cout<<"Framgment "<set_name(fragName); currentPDBFragment->set_fragSeq(fragSeq); currentPDBFragment->set_iCode(atomLine->iCode); } } if((fragName != atomLine->resName)||(fragSeq != atomLine->resSeq)){ currentPDBFragment = new PDBFragment(); currentPDBSegment->fragment.push_back(currentPDBFragment); fragName = atomLine->resName; // cout<<"Framgment "<resSeq; currentPDBFragment->set_name(fragName); currentPDBFragment->set_fragSeq(fragSeq); currentPDBFragment->set_iCode(atomLine->iCode); } currentPDBFragment->atom.push_back(atomLine); //cout<name<0) multiPDBModel = true; if(model.size()==0){ if(currentPDBModel->size()>0) model.push_back(currentPDBModel); else{ cout <<"no model found"< PDBSegment::sequence(int TYPE=LONG) const { //content access string PDB::header() const { string s; for(size_t i=0;i a = PDBUtils::split(author_[i].text,','); for(size_t j=0;j PDB::seqres(string c) const { string s; if(seqres_.size()==0) return vector(); if(c==""||c==" "){ for(size_t i=0;i PDB::remarks() const { vector res; for(size_t i=1;i=0){ // if((*this)[i][j][k].resSeq!= resSeq) break; // else if((*this)[i][j][k].name =="O") { // (*this)[i][j][k].name = "OT1"; // break; // } // else --k; // } // } // } // } } void PDB::normalizeCh22(){ // for(int i=0;i<(*this).size();++i){ // for(int j=0;j<(*this)[i].size();++j){ // bool patched = false; // for(int k=0;k<(*this)[i][j].size();++k){ // //loop on PDBAtomLine // string name = (*this)[i][j][k].name; // if(name == "H") (*this)[i][j][k].name = "HN"; // else if(name =="H1") (*this)[i][j][k].name = "HT1"; // else if(name =="H2") (*this)[i][j][k].name = "HT2"; // else if(name =="H3") (*this)[i][j][k].name = "HT3"; // else if(name =="OXT"){ // (*this)[i][j][k].name = "OT2"; // patched = true; // } // if((*this)[i][j][k].resName =="ILE"){ // if(name =="CD1") (*this)[i][j][k].name = "CD"; // else if(name =="HD11") (*this)[i][j][k].name = "HD1"; // else if(name =="HD12") (*this)[i][j][k].name = "HD2"; // else if(name =="HD13") (*this)[i][j][k].name = "HD3"; // } // } // //change names in terminal ........ // if(patched){ // int k = (*this)[i][j].size()-1; // string resName = (*this)[i][j][k].resName; // int resSeq = (*this)[i][j][k].resSeq; // while(k>=0){ // if((*this)[i][j][k].resSeq!= resSeq) break; // else if((*this)[i][j][k].name =="O") { // (*this)[i][j][k].name = "OT1"; // break; // } // else --k; // } // } // } // } } bool comp(string a1,string a2){ for(size_t i=0;i three2one_map; int init_three2one(){ three2one_map["ALA"] = "A"; three2one_map["ASX"] = "B"; three2one_map["CYS"] = "C"; three2one_map["ASP"] = "D"; three2one_map["GLU"] = "E"; three2one_map["PHE"] = "F"; three2one_map["GLY"] = "G"; three2one_map["HIS"] = "H"; three2one_map["HSD"] = "H"; three2one_map["ILE"] = "I"; three2one_map["LYS"] = "K"; three2one_map["LEU"] = "L"; three2one_map["MET"] = "M"; three2one_map["ASN"] = "N"; three2one_map["PRO"] = "P"; three2one_map["GLN"] = "Q"; three2one_map["ARG"] = "R"; three2one_map["SER"] = "S"; three2one_map["THR"] = "T"; three2one_map["VAL"] = "V"; three2one_map["TRP"] = "W"; three2one_map["TYR"] = "Y"; three2one_map["GLX"] = "Z"; three2one_map["DAL"] = "a"; // three2one_map["DCY"] = "c"; // three2one_map["DAS"] = "d";// three2one_map["DGL"] = "e";// three2one_map["DPH"] = "f";// three2one_map["DGY"] = "g";// three2one_map["DHI"] = "h";// three2one_map["DIL"] = "i";// three2one_map["DLY"] = "k";// three2one_map["DLE"] = "l";// three2one_map["DME"] = "m";// three2one_map["DAN"] = "n";// three2one_map["DPR"] = "p";// three2one_map["DGN"] = "q";// three2one_map["DAR"] = "r";// three2one_map["DSE"] = "s";// three2one_map["DTH"] = "t";// three2one_map["DVA"] = "v";// three2one_map["DTR"] = "w";// three2one_map["DTY"] = "y";// return 0; } string four2three(string damin){ if(damin=="DALA") return "DAL"; if(damin=="DCYS") return "DCY"; if(damin=="DASP") return "DAS"; if(damin=="DGLU") return "DGL"; if(damin=="DPHE") return "DPH"; if(damin=="DGLY") return "DGY"; if(damin=="DHIS") return "DHI"; if(damin=="DILE") return "DIL"; if(damin=="DLYS") return "DLY"; if(damin=="DLEU") return "DLE"; if(damin=="DMET") return "DME"; if(damin=="DASN") return "DAN"; if(damin=="DPRO") return "DPR"; if(damin=="DGLN") return "DGN"; if(damin=="DARG") return "DAR"; if(damin=="DSER") return "DSE"; if(damin=="DTHR") return "DTH"; if(damin=="DVAL") return "DVA"; if(damin=="DTRP") return "DTR"; if(damin=="DTYR") return "DTY"; else return "XXX"; } static int init = init_three2one(); string three2one(string aa){ if(three2one_map.size()==0) init = init_three2one(); map::iterator pos = three2one_map.find(aa); if(pos == three2one_map.end()){ return "X"; } else return pos->second; } string one2three(string aa){ if(three2one_map.size()==0) init = init_three2one(); map::iterator iter = three2one_map.begin(), end = three2one_map.end(); while(iter!=end){ if(iter->second==aa) return iter->first; ++iter; } return "UKN"; } string one2three(char aa){ string s; s+=aa; return one2three(s);} int one2index(string aa){ if(aa=="A") return 0; if(aa=="B") return 1; if(aa=="C") return 2; if(aa=="D") return 3; if(aa=="E") return 4; if(aa=="F") return 5; if(aa=="G") return 6; if(aa=="H") return 7; if(aa=="I") return 8; if(aa=="K") return 9; if(aa=="L") return 10; if(aa=="M") return 11; if(aa=="N") return 12; if(aa=="P") return 13; if(aa=="Q") return 14; if(aa=="R") return 15; if(aa=="S") return 16; if(aa=="T") return 17; if(aa=="V") return 18; if(aa=="W") return 19; if(aa=="Y") return 20; if(aa=="Z") return 21; return 99; } vector read_fasta(string file){ ifstream fasta; fasta.open(file.c_str()); if(!fasta){ throw "Unable to read fasta file!"; } vector res; string current; string line; while(getline(fasta,line)){ if(line[0]=='>'){ if(current.size()){ res.push_back(current); current = ""; } continue; } else { for(size_t i=0;i fasta2three(string s, bool frm_long){ vector res; for(int i=0;i seq = seg.sequence(1); for(size_t i=0;i::iterator pos = three2one_map.find(seq[i]); if(pos==three2one_map.end()){ // cout<<"DEBUG NOT AN AMPINOACID "< X,Y,Z; vector n; for(int i = 0; i< seg.size();i++){ const PDBFragment & f = seg[i]; for(int a = 0;a9){ // aout<<"\t>>CHAIN BREAK BETWEEN "< X,Y,Z; vector n; vector frag; for(int i = 0; i< seg.size();i++){ const PDBFragment & f = seg[i]; for(int a = 0;a9){ // aout<<"\t>>CHAIN BREAK BETWEEN "<fragment.push_back(new PDBFragment(seg[frag[i]])); mod.segment.push_back(n_seg); n_seg = new PDBSegment; } else{ n_seg->fragment.push_back(new PDBFragment(seg[frag[i]])); } } n_seg->fragment.push_back(new PDBFragment(seg[frag[s]])); mod.segment.push_back(n_seg); return mod; } }