/*************************************************************************** swissprot_mod.cpp - description ------------------- begin : Thu Jan 11 10:09:01 2007 copyright : (C) 2002 by Cavalli Andrea email : cavalli@bioc.unizh.ch **************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace Almost; class SwissProt { //Data enum { REVIEWD, UNREVIEWD }; struct SpEntry { string id; string acc; bool data_class; int length; string gn; string os; string oc; string ox; string rx; vector dr; string seq_desc; string seq; }; vector sp; public: SwissProt(){}; int parse(string file){ int comp = 0; int len = file.size()-1; if(file[len]=='z'&&file[len-1]=='g') comp =1; if(file[len]=='2'&&file[len-1]=='z'&&file[len-2]=='b') comp=2; if(comp==0){ ifstream in; in.open(file.c_str()); if(!in){ cout<<"FATAL ERROR: no such file: "< tok; boost::split(tok,sp[i].dr[j],boost::is_any_of(" ")); while(tok.size()<5) tok.push_back("-"); stringstream c; c <<"INSERT INTO spx VALUES("; c <<"'"< tok; boost::split(tok,l_local,boost::is_any_of(" ")); sp_entry.id = tok[1]; if(tok[2]=="Reviewed;") sp_entry.data_class = true; else sp_entry.data_class = false; sp_entry.length = atoi(tok[3].c_str()); } void ac_line(SpEntry & sp_entry,const string &l){ string l_local; int size = l.size(); l_local.push_back(l[0]); int last = 0; for(int i=1;i tok; boost::split(tok,l_local,boost::is_any_of(" ")); sp_entry.acc = string(tok[1].begin(),tok[1].end()-1); } void read(istream &in){ int count = 0; SpEntry sp_entry; while(in){ string l; getline(in,l,'\n'); string keyword = string(l.begin(),l.begin()+2); //DT DE OG OH skipped. Only RX in cit. CC KW FT skipped if(keyword=="ID"){ id_line(sp_entry,l); } else if(keyword=="AC"){ ac_line(sp_entry,l); } else if(keyword=="GN"){ gn_line(sp_entry,l); } else if(keyword=="OS"){ os_line(sp_entry,l); } else if(keyword=="OC"){ oc_line(sp_entry,l); } else if(keyword=="OX"){ ox_line(sp_entry,l); } else if(keyword=="RX"){ rx_line(sp_entry,l); } else if(keyword=="DR"){ dr_line(sp_entry,l); } else if(keyword=="SQ"){ sq_line(sp_entry,l); } else if(keyword==" "){ if(sp_entry.seq_desc.size()!=0){ sp_entry.seq += string(l.begin()+5,l.end()); } } else if(keyword=="//"){ sp.push_back(sp_entry); sp_entry = SpEntry(); cout<<++count<<" "<"< inline string to_string(const SwissProt &){ return ""; } extern "C" { void init_swissprot(){ //declarations here Module mod = Module("swissprot"); Class(mod.self(),"swissprot") .def_method("parse",&SwissProt::parse) .def_method("create_db",&SwissProt::create_db) ; } }