#include #include #include using namespace iotbx::pdb::hierarchy; namespace phaser { namespace ncs { // Helper methods bool is_valid_residue_group(const residue_group& rg) { return 0 < rg.atom_groups().size(); } // // class ReferenceChain // // Constructor ReferenceChain::ReferenceChain(const chain& ch, const MacromoleculeType& mmt) : chain_( ch ), mmt_( mmt ) { } // Destructor ReferenceChain::~ReferenceChain() { } // Accessors const MacromoleculeType& ReferenceChain::get_macromolecule_type() const { return mmt_; } const chain& ReferenceChain::get_chain() const { return chain_; } // Alignment methods std::pair< std::string, long > ReferenceChain::determine_numbering_shift(const chain& other) const { std::string ref_seq = mmt_.residue_group_sequence( chain_.residue_groups() ); std::string other_seq = mmt_.residue_group_sequence( other.residue_groups() ); LCSTree tree; tree.insert_word( ref_seq ); tree.insert_word( other_seq ); std::string longest_substring = tree.get_longest_common_substring(); size_t other_index = other_seq.find( longest_substring ); size_t ref_index = ref_seq.find( longest_substring ); PHASER_ASSERT( other_index != std::string::npos ); PHASER_ASSERT( ref_index != std::string::npos ); long numbering_shift = 0; std::map< long, unsigned int > count_for; const std::vector< residue_group >& other_rgs = other.residue_groups(); const std::vector< residue_group >& ref_rgs = chain_.residue_groups(); for ( size_t i = 0; i < longest_substring.size(); ++i ) { long nshift = other_rgs[ other_index + i ].resseq_as_int() - ref_rgs[ ref_index + i ].resseq_as_int(); count_for[ nshift ]++; } if ( 0 < count_for.size() ) { std::map< long, unsigned int >::const_iterator it = count_for.begin(); numbering_shift = it->first; unsigned int best_count = it->second; ++it; for ( ; it != count_for.end(); ++it ) { if ( best_count < it->second ) { best_count = it->second; numbering_shift = it->first; } } } return std::pair< std::string, long >( longest_substring, numbering_shift ); } // // class ChainAlignment // // Constructor ChainAlignment::ChainAlignment(const chain& ref, const chain& ali, long offset) : ref_chain_length_( ref.residue_groups().size() ), ali_chain_length_( ali.residue_groups().size() ) { std::map< std::string, residue_group > ref_frame_resid; // Offset ali residue numbers const std::vector< residue_group >& a = ali.residue_groups(); for ( std::vector< residue_group >::const_iterator it = a.begin(); it != a.end(); ++it ) { if ( !is_valid_residue_group( *it ) ) { continue; } std::ostringstream oss; oss << ( it->resseq_as_int() - offset ) << it->data->icode.elems; ref_frame_resid[ oss.str() ] = *it; } // Create alignment const std::vector< residue_group >& r = ref.residue_groups(); for ( std::vector< residue_group >::const_iterator it = r.begin(); it != r.end(); ++it ) { if ( !is_valid_residue_group( *it ) ) { continue; } std::ostringstream oss; oss << it->resseq_as_int() << it->data->icode.elems; std::string ref_resid( oss.str() ); if ( ref_frame_resid.find( ref_resid ) != ref_frame_resid.end() ) { alignment_.push_back( std::pair< residue_group, residue_group >( *it, ref_frame_resid[ ref_resid ] ) ); } } } // Destructor ChainAlignment::~ChainAlignment() { } // Information methods size_t ChainAlignment::get_alignment_length() const { return ref_chain_length_ + ali_chain_length_ - get_overlap_length(); } size_t ChainAlignment::get_overlap_length() const { return alignment_.size(); } float ChainAlignment::get_sequence_identity() const { size_t identical_count = 0; for ( aliIter it = alignment_.begin(); it != alignment_.end(); ++it ) { iotbx::pdb::str3 ref_name = it->first.atom_groups()[0].data->resname; iotbx::pdb::str3 ali_name = it->second.atom_groups()[0].data->resname; if ( ref_name == ali_name ) { ++identical_count; } } size_t shorter_length = std::min( ref_chain_length_, ali_chain_length_ ); if ( shorter_length == 0 ) { return 0; } return static_cast< float >( identical_count ) / shorter_length; } float ChainAlignment::get_sequence_coverage() const { return static_cast< float >( get_overlap_length() ) / get_alignment_length(); } const std::vector< std::pair< residue_group, residue_group > >& ChainAlignment::get_residue_group_alignment() const { return alignment_; } std::vector< std::pair< atom, atom > > ChainAlignment::get_atom_alignment() const { std::vector< std::pair< atom, atom > > atom_alignment; for ( aliIter alit = alignment_.begin(); alit != alignment_.end(); ++alit ) { std::map< iotbx::pdb::str4, atom > atom_for; scitbx::af::shared< atom > ali_atoms = alit->second.atoms(); for( scitbx::af::shared< atom >::const_iterator ait = ali_atoms.begin(); ait != ali_atoms.end(); ++ait ) { atom_for[ ait->data->name ] = *ait; } scitbx::af::shared< atom > ref_atoms = alit->first.atoms(); for( scitbx::af::shared< atom >::const_iterator ait = ref_atoms.begin(); ait != ref_atoms.end(); ++ait ) { iotbx::pdb::str4 name = ait->data->name; if ( atom_for.find( name ) != atom_for.end() ) { atom_alignment.push_back( std::pair< atom, atom >( *ait, atom_for[ name ] ) ); } } } return atom_alignment; } } } // namespace phaser::ncs