#include "GDB.h" void printSyntax() { cerr << "Convert MFR fragment table to ROSETTA fragments: " << endl; cerr << "File Name Settings: " << endl; cerr << " -mfr MFR table." << endl; cerr << " -segLength fragment length." << endl; cerr << " -angleSSDir direcotory for the idelized angles and secondary structure table files for proteins in MFR database." << endl; cerr << "Examples:" << endl; cerr << " mfr2rosetta -mfr mfr.tab -angleSSDir ./ANGLESS -segLength 9" << endl ; cerr << " Convert 9-residues MFR fragments with filename 'mfr.tab' to ROSETTA format, " << endl; cerr << " the converted ROSETTA fragments will be printed to the screen" << endl ; exit(0); } char * ftoa( float n, char *buff, char f='g', int prec=6 ) { if ( !(f=='f' || f=='F' || f=='e' || f=='E' || f=='g' || f=='G') ) { f = 'f'; } char format[20]; char *fs = format; // generate format string *fs++ = '%'; // "%.l" if ( prec >= 0 ) { if ( prec > 99 ) // buf big enough for precision? prec = 99; *fs++ = '.'; if ( prec >= 10 ) { *fs++ = prec / 10 + '0'; *fs++ = prec % 10 + '0'; } else { *fs++ = prec + '0'; } } *fs++ = 'l'; *fs++ = f; *fs = '\0'; sprintf( buff, format, n ); return buff; } char * itoa( int n, char *buff, int base=10 ) { sprintf(buff, "%d", n); return buff; } char * tolower( string str, char *buff ) { int len = str.length(); for(int i = 0; i< len; i++) { char c = str[i]; if( c >='A' && c <= 'Z') buff[i] = c+32; else buff[i] = c; } buff[len] = '\0'; return buff; } int main( int argc, char ** argv ) { string argList, slash_char; if(argc == 1) { printSyntax(); } for(int i = 1; i < argc; i++){ argList+= (argv[i]); argList+= (" "); } vector fields = GDB::split("-", argList); GDB db; int len = 0; string temp = getenv( "PATH" ); if(temp.find("/") != string::npos ) slash_char = "/"; // unix else if(temp.find("\\") != string::npos ) slash_char = "\\"; // Windows string angleSSDir; if (getenv( "ANGLESS_DIR" ) == NULL ) angleSSDir = "./ANGLESS"; else angleSSDir = getenv( "ANGLESS_DIR" ); for(int i = 0; i < fields.size(); i++){ vector temp; int pos = fields[i].find_first_of(' '); temp.push_back( fields[i].substr(0,pos) ); temp.push_back( fields[i].substr(pos+1,fields[i].length()-pos-1 )); string arg = GDB::simplifyWhiteSpace(temp[1]); if (temp[0] == "mfr" ) db.loadGDB(arg); else if(temp[0] == "angleSSDir" ) angleSSDir = arg; else if(temp[0] == "segLength" ) len = atoi( arg.c_str() ); else printSyntax(); } if( len == 0 ) { cerr << "Fragment length '-segLength' need to be specified" << endl; exit(0); } char buf[100]; map< int, map >::iterator it, itA; int tag = 2, tag_miss = 4; float zero = 0.0; string ss = "L"; map name_his, cnt_his; for ( it = db.Entries.begin(); it != db.Entries.end(); it++ ) { string PDB_NAME = it->second["PDB_NAME"]; name_his[PDB_NAME]++; cnt_his[ it->second["D_RES1"] ]++; } map::iterator itN; int res0 = atoi( (cnt_his.begin()->first).c_str() ); int resN = res0; for ( itN = cnt_his.begin(); itN != cnt_his.end(); itN++ ) { int resID = atoi( (itN->first).c_str() ); if( resID > resN ) resN = resID; } cerr << "Checking MFR Fragments Candidates ... " << endl; bool checked = true; if( res0 != 1) { cerr << "\t***ERROR:: Fragments not start from residue 1 " << endl; checked = false; } for( int i = res0; i <= resN; i++) { string temp = itoa(i, buf); if( cnt_his[temp] == 0 ) { cerr << "\tERROR:: No fragment candidates for target fragment " << i << "-" << i+len-2 << endl; checked = false; } else if( cnt_his[ temp ] == 200 ) cerr << "\t" << cnt_his[ temp ] << " fragments candidates for target fragment " << i << "-" << i+len-1 << endl; else cerr << "\t" << cnt_his[ temp ] << " fragments candidates for target fragment " << i << "-" << i+len-1 << " *** \n"; } if(!checked) { cerr << "ERRORS found in MFR fragment candidates, exit ... " << endl; exit(0); } map > pdb_phi, pdb_psi, pdb_omega; map > pdb_ss; cerr << "Collecting structural information ... " ; for ( itN = name_his.begin(); itN != name_his.end(); itN++ ) { string PDB_NAME = itN->first; string fname0 = angleSSDir+slash_char+PDB_NAME.substr(0,5)+".tab"; // ifstream iangle_file(fname0.c_str()); if( iangle_file.is_open() ) { GDB angle_tab; angle_tab.loadGDB(fname0); for ( it = angle_tab.Entries.begin(); it != angle_tab.Entries.end(); it++ ) { int resID = atoi(it->second["RESID"].c_str()); pdb_phi[PDB_NAME][resID] = it->second["PHI"]; pdb_psi[PDB_NAME][resID] = it->second["PSI"]; pdb_omega[PDB_NAME][resID] = it->second["OMEGA"]; pdb_ss[PDB_NAME][resID] = it->second["SS"]; } } else { cerr << "Angle Table " << fname0 << " not Found! Exit... \n"; exit(0); } } cerr << "\t Done (" << name_his.size() << ")! \nCreating Rosetta fragments ..." ; for ( it = db.Entries.begin(); it != db.Entries.end(); it++ ) { string PDB_NAME = it->second["PDB_NAME"]; string temp0 = PDB_NAME.substr(0,4); string PDB4 = tolower(temp0, buf); string CHAIN = PDB_NAME.substr(4,1); if(CHAIN == "0") CHAIN = "_"; string SEQ = it->second["SEQ"]; int resID_1 = atoi(it->second["RES1"].c_str()); int RANK = atoi(it->second["RANK"].c_str()); int INDEX = atoi(it->second["D_RES1"].c_str()); if( RANK == 1 ) // print a title for each set of fragments printf(" position: %3d neighbors: %3d\n\n", INDEX, cnt_his[ it->second["D_RES1"] ]); for( int i = 1; i <= len; i++) { string sequ = SEQ.substr(i-1,1); printf(" %4s %1s %5d %1s %1s%9s%9s%9s%9.3f%9.3f %9.3f%2d %9.3f P%3d F%3d\n",PDB4.c_str(),CHAIN.c_str(), resID_1+i-1,sequ.c_str(), pdb_ss[PDB_NAME][resID_1+i-1].c_str(), pdb_phi[PDB_NAME][resID_1+i-1].c_str(), pdb_psi[PDB_NAME][resID_1+i-1].c_str(),pdb_omega[PDB_NAME][resID_1+i-1].c_str(), zero,zero,zero,tag,zero,INDEX,RANK); } cout << endl; continue; } cerr << "\tDone! \n" ; }