/*************************************************************************** topos.cpp - description ------------------- begin : Tue Nov 18 01:45:35 2003 copyright : (C) 1999-2006 by Cavalli Andrea author : : cavalli $ date : : 2003/12/09 11:10:33 $ id : : coor.h,v 1.1.2.2 2003/12/09 11:10:33 cavalli Exp $ email : cavalli@bioc.unizh.ch amc82@cam.ac.uk **************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include namespace Almost { namespace Topos { double T = 1.5; double kpos = 0; double ks = 1; //double zero[6] = { 0.51, 0.51, 4.56, 2.49, 2.01, 2.02}; double zero[6] = { 0, 0, 0,0,0,0}; double k[6][3] = { { 14.66, 17.54, 15.25}, { 15, 15, 15}, { 0.16, 0.18, 0.20}, { 0.72, 0.99, 0.72}, { 0.76, 0.91, 0.70}, { 1.15, 1.21, 1.04} }; double ko[3] = { 0.74, 1.48, 0.74}; double kk = 1; double k_homo[20][20] = { { 0, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 3, 1, 2, 2, 2, 2 }, { 1, 0, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 1, 3, 1, 2, 1, 1, 2 }, { 1, 1, 0, 0, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2 }, { 1, 1, 0, 0, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2 }, { 1, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2 }, { 1, 1, 1, 1, 1, 0, 1, 2, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2 }, { 1, 1, 1, 1, 1, 1, 0, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, 2 }, { 2, 2, 2, 2, 2, 2, 2, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, { 1, 1, 1, 1, 1, 1, 1, 3, 0, 2, 1, 2, 2, 1, 3, 2, 2, 1, 1, 2 }, { 2, 2, 2, 2, 2, 2, 2, 3, 2, 0, 1, 2, 2, 2, 3, 2, 1, 2, 2, 0 }, { 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 0, 1, 1, 1, 3, 2, 2, 1, 1, 2 }, { 1, 0, 1, 1, 1, 1, 2, 3, 2, 2, 1, 0, 1, 2, 3, 1, 2, 2, 2, 2 }, { 1, 1, 1, 1, 1, 1, 1, 3, 2, 2, 1, 1, 0, 2, 3, 1, 2, 2, 2, 2 }, { 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 2, 2, 0, 3, 2, 2, 0, 0, 1 }, { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3 }, { 1, 1, 1, 1, 1, 1, 1, 3, 2, 2, 2, 1, 1, 2, 3, 0, 1, 2, 2, 2 }, { 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 2, 2, 3, 1, 0, 1, 1, 1 }, { 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 2, 2, 0, 3, 2, 1, 0, 0, 1 }, { 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 2, 2, 0, 3, 2, 1, 0, 0, 1 }, { 2, 2, 2, 2, 2, 2, 2, 3, 2, 0, 2, 2, 2, 1, 3, 2, 1, 1, 1, 0 } }; //double k_dpmf =0; vector > matx; vector Ph,Pb,Pc; vector res_data; vector res_data_triple; vector res_triple_db; void read_blast_mtx(ifstream & in){ istream_iterator iter(in),end; int size = atoi(iter->c_str());++iter; cout<<"Sequence "<<*iter<<"\n";++iter; for(int i=0;i<12;i++){ cout<<*iter<<"\n";++iter; } matx.resize(size); char *ncbicodes = "XAXCDEFGHIKLMNPQRSTVWXYXXX"; for(int i=0;ic_str());++iter; d = -d/100; matx[i].push_back(d); } } } } void read_prob(istream & prob){ istream_iterator iter(prob),end; while(iter!=end){ ++iter; double w; w = atof(iter->c_str()); ++iter; if(w<0.0001) w = 10000; else { w = -log(w); } Ph.push_back(w); w = atof(iter->c_str()); ++iter; if(w<0.0001) w = 10000; else { w = -log(w); } Pb.push_back(w); w = atof(iter->c_str()); ++iter; if(w<0.0001) w = 10000; else { w = -log(w); } Pc.push_back(w); } } int toind(char c){ if(c=='A') return 0; if(c=='R') return 1; if(c=='N') return 2; if(c=='D') return 3; if(c=='C') return 4; if(c=='Q') return 5; if(c=='E') return 6; if(c=='G') return 7; if(c=='H') return 8; if(c=='I') return 9; if(c=='L') return 10; if(c=='K') return 11; if(c=='M') return 12; if(c=='F') return 13; if(c=='P') return 14; if(c=='S') return 15; if(c=='T') return 16; if(c=='W') return 17; if(c=='Y') return 18; if(c=='V') return 19; return -1; } int toinda(char c){ if(c=='A') return 0; if(c=='C') return 1; if(c=='D') return 2; if(c=='E') return 3; if(c=='F') return 4; if(c=='G') return 5; if(c=='H') return 6; if(c=='I') return 7; if(c=='K') return 8; if(c=='L') return 9; if(c=='M') return 10; if(c=='N') return 11; if(c=='P') return 12; if(c=='Q') return 13; if(c=='R') return 14; if(c=='S') return 15; if(c=='T') return 16; if(c=='V') return 17; if(c=='W') return 18; if(c=='Y') return 19; return -1; } void read_database(string name){ ifstream in; in.open(name.c_str()); if(!in){ cout<<"Can't read database\n"; exit(1); } int size; in.read((char*)&size,sizeof(int)); cout<<"Reading "< iter(in),end; while(iter!=end){ ResidueData rd; strcpy(rd.prot, name.c_str()); rd.num = atoi((*iter).c_str()); ++iter; rd.name = (*iter)[0]; ++iter; rd.phi = atof((*iter).c_str()); ++iter; rd.psi = atof((*iter).c_str()); ++iter; rd.ome = atof((*iter).c_str()); ++iter; rd.ha = atof((*iter).c_str()); ++iter; rd.h = atof((*iter).c_str()); ++iter; rd.n = atof((*iter).c_str()); ++iter; rd.ca = atof((*iter).c_str()); ++iter; rd.cb = atof((*iter).c_str()); ++iter; rd.c = atof((*iter).c_str()); ++iter; rd.ss = (*iter)[0]; ++iter; res_data.push_back(rd); } cout<<"Read protein "< iter(in), end; int size = atoi(iter->c_str()); ++iter; cout<<"Reading "<c_str()); ++iter; } tok = (*iter); ++iter; strcpy(dt.name,tok.c_str()); for(int q = 0;q<3;q++){ dt.ss[q] = iter->c_str()[0]; ++iter; } for(int q = 0;q<3;q++){ dt.phi[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.psi[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.ome[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.ha[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.h[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.n[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.ca[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.cb[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.c[q] = atof(iter->c_str()); ++iter; } res_triple_db.push_back(dt); } cout<<"Read "< iter(in), end; int size = atoi(iter->c_str()); ++iter; // cout<<"Reading "<c_str()); ++iter; } tok = (*iter); ++iter; strcpy(dt.name,tok.c_str()); for(int q = 0;q<3;q++){ dt.ss[q] = iter->c_str()[0]; ++iter; } for(int q = 0;q<3;q++){ dt.phi[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.psi[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.ome[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.ha[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.h[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.n[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.ca[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.cb[q] = atof(iter->c_str()); ++iter; } for(int q = 0;q<3;q++){ dt.c[q] = atof(iter->c_str()); ++iter; } res_triple_db.push_back(dt); } obs.on_db_read(size,size); // cout<<"Read "<180) d1 = d1-180; double d2 = abs(psi1-psi2); if(d2>180) d2 = d2-180; // cout<<"DEBUG "<< phi1<<" "< cluster(vector & phi, vector & psi, vector &scr){ pair phi_psi; vector > proxy; proxy.resize(phi.size()); for(int i=0;i0){ double pp = 0,qq=0, s = 0; for(int i=0;i> Cluster "<> Cluster Best Score "<n){ p=i; n=proxy[i][i]; } } double pp = 0,qq=0; for(int i=0;i2){ cout<<"\n\t>> CLUSTER "<> Cluster "< &b, double S,pair &data){ double Stot = 0; double phi = 0; double psi = 0; double dphi = 0; double dpsi = 0; for(int i=0;i1){ dphi = sqrt(dphi/Stot - phi/Stot*phi/Stot); dpsi = sqrt(dpsi/Stot - psi/Stot*psi/Stot); // cout< top; vector > clusters; vector phi; vector psi; vector scr; clusters.push_back(vector()); map res; set scores; for(int j=0;j::iterator iter = scores.begin(),end = scores.end(); int count = 0; int n = 0; double PHI = 0; double PSI = 0; double DPHI = 0; double DPSI = 0; double Stot2 = 0; while((iter!=end)&&(count<500)&&(n<500)){ ++n; PHI += res[*iter].phi[1]*exp(-(*iter)/T); PSI += res[*iter].psi[1]*exp(-(*iter)/T); DPHI += ( res[*iter].phi[1])*( res[*iter].phi[1]*exp(-(*iter)/T)); DPSI += ( res[*iter].psi[1])*( res[*iter].psi[1]*exp(-(*iter)/T)); Stot2 +=exp(-(*iter)/T); top.push_back(res[*iter]); double S = 10000; for(int l = 0;l90){ //Was 50 phi.push_back(res[*iter].phi[1]); psi.push_back(res[*iter].psi[1]); scr.push_back(*iter); clusters.push_back(vector()); clusters.back().push_back(res[*iter]); ++iter; } else{ ++iter; } ++count; } if(n>0){ PHI /= Stot2; PSI /= Stot2; DPHI = sqrt(DPHI/Stot2-PHI*PHI); DPSI = sqrt(DPSI/Stot2-PSI*PSI); map rank; vector > val; for(int l =0;l tmp; double w = cluster_weight(res_data_triple[i],clusters[l],Stot2,tmp); if(w>0.00005){ val.push_back(tmp); rank[w] = l; } } log<::reverse_iterator iter = rank.rbegin(), end = rank.rend(); int count = 0; while(iter!=end){ int l = iter->second; log<<"\tCluster "<first; ++iter; log<<"\n"; if(count<3){ rest < top; vector > clusters; vector phi; vector psi; vector scr; clusters.push_back(vector()); map res; set scores; for(int j=0;j::iterator iter = scores.begin(),end = scores.end(); int count = 0; int n = 0; double PHI = 0; double PSI = 0; double DPHI = 0; double DPSI = 0; double Stot2 = 0; while((iter!=end)&&(count<500)&&(n<500)){ ++n; PHI += res[*iter].phi[1]*exp(-(*iter)/T); PSI += res[*iter].psi[1]*exp(-(*iter)/T); DPHI += ( res[*iter].phi[1])*( res[*iter].phi[1]*exp(-(*iter)/T)); DPSI += ( res[*iter].psi[1])*( res[*iter].psi[1]*exp(-(*iter)/T)); Stot2 +=exp(-(*iter)/T); top.push_back(res[*iter]); double S = 10000; for(int l = 0;l90){ //Was 50 phi.push_back(res[*iter].phi[1]); psi.push_back(res[*iter].psi[1]); scr.push_back(*iter); clusters.push_back(vector()); clusters.back().push_back(res[*iter]); ++iter; } else{ ++iter; } ++count; } if(n>0){ PHI /= Stot2; PSI /= Stot2; DPHI = sqrt(DPHI/Stot2-PHI*PHI); DPSI = sqrt(DPSI/Stot2-PSI*PSI); map rank; vector > val; for(int l =0;l tmp; double w = cluster_weight(res_data_triple[i],clusters[l],Stot2,tmp); if(w>0.00005){ val.push_back(tmp); rank[w] = l; } } log<::reverse_iterator iter = rank.rbegin(), end = rank.rend(); int count = 0; while(iter!=end){ int l = iter->second; log<<"\tCluster "<first; ++iter; log<<"\n"; if(count<3){ rest < iter(in),end; vector res; vector res_triple; while(iter!=end){ ResidueData rd; strcpy(rd.prot, name.c_str()); rd.num = atoi((*iter).c_str()); ++iter; rd.name = toupper((*iter)[0]); ++iter; rd.phi = atof((*iter).c_str()); ++iter; rd.psi = atof((*iter).c_str()); ++iter; rd.ome = atof((*iter).c_str()); ++iter; rd.ha = atof((*iter).c_str()); ++iter; rd.h = atof((*iter).c_str()); ++iter; rd.n = atof((*iter).c_str()); ++iter; rd.ca = atof((*iter).c_str()); ++iter; rd.cb = atof((*iter).c_str()); ++iter; rd.c = atof((*iter).c_str()); ++iter; rd.ss = (*iter)[0]; ++iter; res.push_back(rd); } cout<<"Read protein "< & database(){ return res_triple_db; } //End topos } } // Local Variables: // mode:C++ // End: