# -*- perl -*- ########################################################################### # # Copyright (C) 1997-2015 Nigel P. Brown # # (i) License # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # (ii) Contacts # # Project Admin: Nigel P. Brown # Email: biomview@gmail.com # Project URL: http://bio-mview.sourceforge.net # # (iii) Citation # # Please acknowledge use of this Program by citing the following reference in # any published work including web-sites: # # Brown, N.P., Leroy C., Sander C. (1998) MView: A Web compatible database # search or multiple alignment viewer. Bioinformatics. 14(4):380-381. # # and provide a link to the MView project URL given above under 'Contacts'. # ########################################################################### # $Id: HSSP.pm,v 1.26 2015/06/14 17:09:04 npb Exp $ ########################################################################### # # HSSP parsing consists of 5 levels of record: # # HEADER # Crudely presents each simple subrecord as a field of the same name, # except the HSSP line which is split for version numbers. # # PROTEIN # The ranking of proteins against the test sequence: # # 'ranking' is an array of records, one per line of input ordered by NR # with corresponding field names. # # get_record() method returns this array, or just the supplied record # numbers counting from 1. # # ALIGNMENT # The structural information about the test sequence and the alignments: # # 'structure' is an array ordered by SeqNo holding the structural data # for the test sequences. This data repeats every '## ALIGNMENT' block, # so is only read once: # # get_chains() returns a list of chain names. # # get_record() returns a list of records for the structure fields (the # alignment portion is not returned), or just the supplied record # numbers counting from 1. # # get_query() returns the test sequence string or just that of the # specified chain. # # get_dssp() returns the DSSP assignment or just that of the specified # chain. # # 'alignment' is an array ordered by SeqNo holding the total alignment # data over all '## ALIGNMENT' blocks: # # get_sequence() returns the sequence string for the n'th sequence # counting from 1 or just that of the specified chain. # # PROFILE # Subrecord parser not implemented. # # INSERTION # Subrecord parser not implemented. # ########################################################################### package NPB::Parse::Format::HSSP; use vars qw(@ISA); use strict; @ISA = qw(NPB::Parse::Record); #delimit full HSSP entry my $HSSP_START = '^HSSP'; my $HSSP_END = '^\/\/'; #HSSP record types my $HSSP_HEADER = $HSSP_START; my $HSSP_HEADERend = '^## PROTEINS'; my $HSSP_PROTEIN = '^## PROTEINS'; my $HSSP_PROTEINend = '^## ALIGNMENTS'; my $HSSP_ALIGNMENT = '^## ALIGNMENTS'; my $HSSP_ALIGNMENTend = '^## SEQUENCE'; my $HSSP_PROFILE = '^## SEQUENCE'; my $HSSP_PROFILEend = '^## INSERTION'; my $HSSP_INSERTION = '^## INSERTION'; my $HSSP_INSERTIONend = $HSSP_END; my $HSSP_Null = '^\s*$';#' #Consume one entry-worth of input on text stream associated with $file and #return a new HSSP instance. sub get_entry { my ($parent) = @_; my ($line, $offset, $bytes) = ('', -1, 0); while (defined ($line = $parent->{'text'}->getline)) { #start of entry if ($line =~ /$HSSP_START/o and $offset < 0) { $offset = $parent->{'text'}->startofline; next; } #end of entry if ($line =~ /$HSSP_END/o) { last; } } return 0 if $offset < 0; $bytes = $parent->{'text'}->tell - $offset; new NPB::Parse::Format::HSSP(undef, $parent->{'text'}, $offset, $bytes); } #Parse one entry sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); while (defined ($line = $text->next_line)) { #HEADER lines if ($line =~ /$HSSP_HEADER/o) { $text->scan_until($HSSP_HEADERend, 'HEADER'); next; } #consume data #PROTEIN lines if ($line =~ /$HSSP_PROTEIN/o) { $text->scan_until($HSSP_PROTEINend, 'PROTEIN'); next; } #ALIGNMENT lines if ($line =~ /$HSSP_ALIGNMENT/o) { $text->scan_until($HSSP_ALIGNMENTend, 'ALIGNMENT'); next; } #PROFILE lines if ($line =~ /$HSSP_PROFILE/o) { $text->scan_until($HSSP_PROFILEend, 'PROFILE'); next; } #INSERTION lines if ($line =~ /$HSSP_INSERTION/o) { $text->scan_until($HSSP_INSERTIONend, 'INSERTION'); next; } #blank line or empty record: ignore if ($line =~ /$HSSP_Null/o) { next; } #terminal line: ignore if ($line =~ /$HSSP_END/o) { next; } #default $self->warn("unknown field: $line"); } $self;#->examine; } ########################################################################### package NPB::Parse::Format::HSSP::HEADER; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); my $tmp; while (defined ($line = $text->next_line)) { if ($line =~ /^HSSP\s+(.*VERSION\s+(.*)\s*)/o) { $self->test_args($line, $1, $2); ( $self->{'full_version'}, $self->{'version'}, ) = ($1, $2); chomp $self->{'full_version'}; next; } if ($line =~ /^PDBID\s+(.*)/) { $self->test_args($line, $1); ( $self->{'pdbid'}, ) = ($1); next; } if ($line =~ /^DATE\s+file generated on\s+(\S+)/) { $self->test_args($line, $1); ( $self->{'date'}, ) = ($1); next; } if ($line =~ /^SEQBASE\s+(.*)/) { $self->test_args($line, $1); ( $self->{'seqbase'}, ) = ($1); next; } if ($line =~ /^PARAMETER/) { $line = $text->scan_while('^PARAMETER'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); chomp $line; $line = NPB::Parse::Record::strip_english_newlines($line); #fix typo in maxhom output $line =~ s/ :/: /g; #add fullstops and trim whitespace $line =~ s/([^:]+):\s*(\S+)\s*/$1: $2. /g; $self->{'parameter'} = $line; next; } if ($line =~ /^THRESHOLD\s+(.*)/) { $self->test_args($line, $1); ( $self->{'threshold'}, ) = ($1); next; } if ($line =~ /^REFERENCE\s+(.*)/) { $self->test_args($line, $1); ( $self->{'reference'}, ) = ($1); next; } if ($line =~ /^CONTACT\s+(.*)/) { $self->test_args($line, $1); ( $self->{'contact'}, ) = ($1); next; } if ($line =~ /^AVAILABLE/) { $line = $text->scan_while('^AVAILABLE'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); $self->{'available'} = NPB::Parse::Record::strip_english_newlines($line); next; } if ($line =~ /^HEADER/) { $line = $text->scan_while('^HEADER'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); $self->{'header'} = NPB::Parse::Record::strip_english_newlines($line); next; } if ($line =~ /^COMPND\s+(.*)/) { $line = $text->scan_while('^COMPND'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); $self->{'compnd'} = NPB::Parse::Record::strip_english_newlines($line); next; } if ($line =~ /^SOURCE\s+(.*)/) { $line = $text->scan_while('^SOURCE'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); $self->{'source'} = NPB::Parse::Record::strip_english_newlines($line); next; } if ($line =~ /^AUTHOR\s+(.*)/) { $line = $text->scan_while('^AUTHOR'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); $self->{'author'} = NPB::Parse::Record::strip_english_newlines($line); next; } if ($line =~ /^SEQLENGTH\s+(\d+)/) { $self->test_args($line, $1); ( $self->{'seqlength'}, ) = ($1); next; } if ($line =~ /^NCHAIN\s+(\d+)/) { $self->test_args($line, $1); ( $self->{'nchain'}, ) = ($1); next; } if ($line =~ /^KCHAIN\s+(\d+)\s+chain\(s\) used here\s*;\s*chain\(s\)\s*:\s*(.*)/) { $self->test_args($line, $1, $2); ( $self->{'kchain'}, $self->{'chainname'}, ) = ($1, [ split(/,\s*/, $2) ]); next; } if ($line =~ /^NALIGN\s+(\d+)/) { $self->test_args($line, $1); ( $self->{'nalign'}, ) = ($1); next; } if ($line =~ /^NOTATION\s+(.*)/) { $line = $text->scan_while('^NOTATION'); $tmp = new NPB::Parse::Record_Stream($self, 11, \$line); $line = $tmp->scan_lines(0); chomp $line; $self->{'notation'} = $line; next; } if ($line =~ /$HSSP_Null/) { next; } #default $self->warn("unknown field: $line"); } $self; } sub print_data { my ($self, $indent) = (@_, 0); my $x = ' ' x $indent; printf "$x%20s -> '%s'\n", 'version', $self->{'version'}; printf "$x%20s -> '%s'\n", 'full_version', $self->{'full_version'}; printf "$x%20s -> %s\n", 'pdbid', $self->{'pdbid'}; printf "$x%20s -> %s\n", 'date', $self->{'date'}; printf "$x%20s -> %s\n", 'seqbase', $self->{'seqbase'}; printf "$x%20s -> %s\n", 'parameter', $self->{'parameter'}; printf "$x%20s -> %s\n", 'threshold', $self->{'threshold'}; printf "$x%20s -> %s\n", 'reference', $self->{'reference'}; printf "$x%20s -> %s\n", 'contact', $self->{'contact'}; printf "$x%20s -> %s\n", 'available', $self->{'available'}; printf "$x%20s -> %s\n", 'header', $self->{'header'}; printf "$x%20s -> %s\n", 'compnd', $self->{'compnd'}; printf "$x%20s -> %s\n", 'source', $self->{'source'}; printf "$x%20s -> %s\n", 'author', $self->{'author'}; printf "$x%20s -> %s\n", 'seqlength', $self->{'seqlength'}; printf "$x%20s -> %s\n", 'nchain', $self->{'nchain'}; printf "$x%20s -> %s\n", 'kchain', $self->{'kchain'}; printf "$x%20s -> %s\n", 'chainname', "[" . join(',',@{$self->{'chainname'}}) ."]"; printf "$x%20s -> %s\n", 'nalign', $self->{'nalign'}; printf "$x%20s -> %s\n", 'notation', $self->{'notation'}; } ########################################################################### package NPB::Parse::Format::HSSP::PROTEIN; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); my ($cut, $part1, $part2, $data); #(## PROTEINS : EMBL/SWISSPROT ...) line $line = $text->next_line; #( NR. ID STRID %IDE ...) line $line = $text->next_line; $cut = index($line, '%IDE'); $self->{'ranking'} = []; while (defined ($line = $text->next_line)) { $part1 = substr($line, 0, $cut+1); $part2 = substr($line, $cut); $data = {}; if ($part1 =~ /^ \s* (\d+) #NR. \s*:\s* (\S+) #ID \s* (\S+)? #STRID /xo) { $self->test_args($line, $1, $2, "$3"); ( $data->{'nr'}, $data->{'id'}, $data->{'strid'}, ) = ($1, $2, $3 eq 0 ? '' : $3); if ($part2 =~ /^ \s* (\S+) #%IDE \s+ (\S+) #%WSIM \s+ (\S+) #IFIR \s+ (\S+) #ILAS \s+ (\S+) #JFIR \s+ (\S+) #JLAS \s+ (\S+) #LALI \s+ (\S+) #NGAP \s+ (\S+) #LGAP \s+ (\S+) #LSEQ2 \s\s (.{11}) #ACCNUM (.*)? #PROTEIN /xo) { $self->test_args($line, $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12); ( $data->{'%ide'}, $data->{'%wsim'}, $data->{'ifir'}, $data->{'ilas'}, $data->{'jfir'}, $data->{'jlas'}, $data->{'lali'}, $data->{'ngap'}, $data->{'lgap'}, $data->{'lseq2'}, $data->{'protein'}, ) = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$12); $data->{'accnum'} = NPB::Parse::Record::strip_trailing_space($11); } push @{$self->{'ranking'}}, $data; next; } #default $self->warn("unknown field: $line"); } $self; } sub print_data { my ($self, $indent) = (@_, 0); my $x = ' ' x $indent; for (my $i=0; $i<@{$self->{'ranking'}}; $i++) { printf "$x%20s -> %s\n", 'nr', $self->{'ranking'}->[$i]->{'nr'}; printf "$x%20s -> %s\n", 'id', $self->{'ranking'}->[$i]->{'id'}; printf "$x%20s -> %s\n", '%ide', $self->{'ranking'}->[$i]->{'%ide'}; printf "$x%20s -> %s\n", '%wsim', $self->{'ranking'}->[$i]->{'%wsim'}; printf "$x%20s -> %s\n", 'ifir', $self->{'ranking'}->[$i]->{'ifir'}; printf "$x%20s -> %s\n", 'ilas', $self->{'ranking'}->[$i]->{'ilas'}; printf "$x%20s -> %s\n", 'jfir', $self->{'ranking'}->[$i]->{'jfir'}; printf "$x%20s -> %s\n", 'jlas', $self->{'ranking'}->[$i]->{'jlas'}; printf "$x%20s -> %s\n", 'lali', $self->{'ranking'}->[$i]->{'lali'}; printf "$x%20s -> %s\n", 'ngap', $self->{'ranking'}->[$i]->{'ngap'}; printf "$x%20s -> %s\n", 'lgap', $self->{'ranking'}->[$i]->{'lgap'}; printf "$x%20s -> %s\n", 'lseq2', $self->{'ranking'}->[$i]->{'lseq2'}; printf "$x%20s -> %s\n", 'protein', $self->{'ranking'}->[$i]->{'protein'}; } } #returns an ordered array of rank records, or just those requested in the #order requested. record numbers are counted from 1. sub get_record { my $self = shift; my $i; my @tmp = (); if (@_) { foreach $i (@_) { if (defined $self->{'ranking'}->[$i]) { push @tmp, $self->{'ranking'}->[$i]; } } } else { for ($i=0; $i < @{$self->{'ranking'}}; $i++) { push @tmp, $self->{'ranking'}->[$i]; } } @tmp; } ########################################################################### package NPB::Parse::Format::HSSP::ALIGNMENT; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); my ($lo, $hi, $cut, $part2, $data, $chaincount, $chain1, $chain2, $i); #construct an array of query structural properties ordered by SeqNo $self->{'structure'} = []; #construct an array of alignments ordered by SeqNo $self->{'alignment'} = []; #hash of indices into 'structure' array, keyed by chainname $self->{'chain_hash'} = {}; #ordered list of chains $self->{'chain_list'} = []; #read first block of alignments #(## ALIGNMENTS 1 - 70) line $line = $text->next_line; ($lo, $hi) = $line =~ /^\#\#\s+ALIGNMENTS\s+(\d+)\s*-\s*(\d+)/; #( SeqNo PDBNo AA STRUCTURE BP1 BP2 ACC NOCC VAR ....:....1..) line $line = $text->next_line; $cut = index($line, '..'); ($chain1, $chaincount) = ('', 1); while (defined ($line = $text->next_line(1))) { if ($cut < length $line) { $part2 = substr($line, $cut); } else { #warn length($line), "($cut) $line\n"; $part2 = ' ' x ($hi-$lo+1); } $data = {}; #chain discontinuity if ($line =~ /^\s*(\d+)\s+!/) { ( $data->{'seqno'}, $data->{'pdbno'}, $data->{'aa'}, $data->{'structure'}, $data->{'bp1'}, $data->{'bp2'}, $data->{'acc'}, $data->{'nocc'}, $data->{'var'}, ) = ($1, '', '!', '!', 0, 0, 0, 0, 0); push @{$self->{'structure'}}, $data; for ($i=0; $i < $hi-$lo+1; $i++) { push @{$self->{'alignment'}->[$lo+$i]}, '!'; } next; } #process the query structural information if ($line =~ /^ \s* (\d+) #SeqNo \s+ (\d+\s.)? #PDBNo, optional, eg. '16 A' \s* (.) #AA \s\s #exactly 2 spaces (.........) #STRUCTURE (9 characters) (....) #BP1 (.....) #BP2 \s+ (\d+) #ACC \s+ (\d+) #NOCC \s+ (\d+) #VAR /xo) { if (defined $2) { $self->test_args($line, $1, $2, $3, $4, $5, $6, $7, $8, $9); } else { $self->test_args($line, $1, $3, $4, $5, $6, $7, $8, $9); } ( $data->{'seqno'}, $data->{'pdbno'}, $data->{'aa'}, $data->{'structure'}, $data->{'bp1'}, $data->{'bp2'}, $data->{'acc'}, $data->{'nocc'}, $data->{'var'}, ) = ($1, "$2", $3, $4, $5, $6, $7, $8, $9); if ($data->{'pdbno'} =~ /^\s*$/) { #missing PDBNo data $data->{'pdbno'} = '0 '; $chain2 = ' '; } else { #PDBNo is composite, eg., '16 A' $data->{'pdbno'} =~ /(.)$/; $chain2 = $1; } push @{$self->{'structure'}}, $data; if ($chain2 eq $chain1) { #extend this chain $self->{'chain_hash'}->{$chain2}->[1] = $data->{'seqno'}; } else { #begin new chain #warn "|$chain1,$chain2|\n"; $self->{'chain_hash'}->{$chain2}->[0] = $data->{'seqno'}; $self->{'chain_hash'}->{$chain2}->[1] = $data->{'seqno'}; push @{$self->{'chain_list'}}, $chain2; $chain1 = $chain2; $chaincount++; } # no point testing this, as maxhom miscalculates $hi sometimes. # # #process the alignments in $part2 # if (length $part2 > $hi-$lo+1) { # $self->warn("alignment columns exceed expected range: @{[length($part2)]} cf. @{[$hi-$lo+1]}"); # } $part2 .= ' ' x ($hi - $lo + 1 - length($part2)); $data = [ split '', $part2 ]; for ($i=0; $i < $hi-$lo+1; $i++) { push @{$self->{'alignment'}->[$lo+$i]}, $data->[$i]; } next; } last if $line =~ /$HSSP_ALIGNMENT/o; last if $line =~ /$HSSP_ALIGNMENTend/o; #default $self->warn("unknown field: $line"); } return $self unless $line =~ /$HSSP_ALIGNMENT/o; #read remaining blocks of alignments #(## ALIGNMENTS 71 - 140) line, etc ($lo, $hi) = $line =~ /^\#\#\s+ALIGNMENTS\s+(\d+)\s*-\s*(\d+)/; #( SeqNo PDBNo AA STRUCTURE BP1 BP2 ACC NOCC VAR ....:....1..) line $line = $text->next_line; $cut = index($line, '..'); while (defined ($line = $text->next_line(1))) { if ($cut < length $line) { $part2 = substr($line, $cut); } else { #warn length($line), "($cut) $line\n"; $part2 = ' ' x ($hi-$lo+1); } $data = {}; if ($line =~ /^ \s* (\d+) #SeqNo /xo) { # no point testing this, as maxhom miscalculates $hi sometimes. # # #process the alignments in $part2 # if (length $part2 > $hi-$lo+1) { # $self->warn("alignment columns exceed expected range: @{[length($part2)]} cf. @{[$hi-$lo+1]}"); # } $part2 .= ' ' x ($hi - $lo + 1 - length($part2)); $data = [ split '', $part2 ]; for ($i=0; $i < $hi-$lo+1; $i++) { push @{$self->{'alignment'}->[$lo+$i]}, $data->[$i]; } next; } #chain discontinuity if ($line =~ /^\s*(\d+)\s+!/) { for ($i=0; $i < $hi-$lo+1; $i++) { push @{$self->{'alignment'}->[$lo+$i]}, '!'; } next; } if ($line =~ /^\#\#\s+ALIGNMENTS\s+(\d+)\s*-\s*(\d+)/) { ($lo, $hi) = ($1, $2); #( SeqNo PDBNo AA STRUCTURE BP1 BP2 ACC NOCC VAR ....:....1..) line $line = $text->next_line; $cut = index($line, '..'); next; } last if $line =~ /$HSSP_ALIGNMENTend/o; #default $self->warn("unknown field: $line"); } $self; } sub print_data { my ($self, $indent) = (@_, 0); my $x = ' ' x $indent; printf "$x%20s -> '%s'\n", 'query', $self->get_query; printf "$x%20s -> '%s'\n", 'dssp', $self->get_dssp(); #structure data [position] foreach my $i ($self->get_record) { printf "$x %20s -> %s\n", 'seqno', $i->{'seqno'}; printf "$x %20s -> '%s'\n", 'pdbno', $i->{'pdbno'}; printf "$x %20s -> %s\n", 'aa', $i->{'aa'}; printf "$x %20s -> '%s'\n", 'structure', $i->{'structure'}; printf "$x %20s -> '%s'\n", 'bp1', $i->{'bp1'}; printf "$x %20s -> '%s'\n", 'bp2', $i->{'bp2'}; printf "$x %20s -> %s\n", 'acc', $i->{'acc'}; printf "$x %20s -> %s\n", 'nocc', $i->{'nocc'}; printf "$x %20s -> %s\n", 'var', $i->{'var'}; } #alignments[chain][rank] foreach my $j ($self->get_chains) { printf "$x%20s -> '%s'\n", "chain[$j]", $self->get_query($j); for (my $i=1; $i<@{$self->{'alignment'}}; $i++) { printf "$x%20s -> '%s'\n", "alignment[$j][$i]", $self->get_sequence($i, $j); } } } #returns list of chain names, in order of appearance in HSSP alignment sub get_chains { @{$_[0]->{'chain_list'}}; } #returns an ordered array of structure records, or just those requested in the #order requested. record numbers are counted from 1. sub get_record { my $self = shift; my $i; my @tmp = (); if (@_) { foreach $i (@_) { if (defined $self->{'structure'}->[$i]) { push @tmp, $self->{'structure'}->[$i]; } } } else { for ($i=0; $i < @{$self->{'structure'}}; $i++) { push @tmp, $self->{'structure'}->[$i]; } } @tmp; } #returns the query sequence string. if a chain name is given, returns only #that chain. sub get_query { my $self = shift; my ($s, $i, $lo, $hi) = (''); #chainname supplied? if (@_) { if (exists $self->{'chain_hash'}->{$_[0]}) { $lo = $self->{'chain_hash'}->{$_[0]}->[0]-1; $hi = $self->{'chain_hash'}->{$_[0]}->[1]; } else { $self->warn("unknown chain: '$_[0]'"); return ''; } } else { $lo = 0; $hi = scalar @{$self->{'structure'}}; } for ($i=$lo; $i < $hi; $i++) { $s .= $self->{'structure'}->[$i]->{'aa'}; } $s; } #returns the query sequence DSSP assignment string. if a chain name is #given, returns only that chain. sub get_dssp { my $self = shift; my ($s, $i, $lo, $hi) = (''); #chainname supplied? if (@_) { if (exists $self->{'chain_hash'}->{$_[0]}) { $lo = $self->{'chain_hash'}->{$_[0]}->[0]-1; $hi = $self->{'chain_hash'}->{$_[0]}->[1]; } else { $self->warn("unknown chain: '$_[0]'"); return ''; } } else { $lo = 0; $hi = scalar @{$self->{'structure'}}; } for ($i=$lo; $i < $hi; $i++) { $s .= substr($self->{'structure'}->[$i]->{'structure'}, 0, 1); } $s; } #returns the aligned sequence string for alignment N counting from 1. if a #chain name is given, returns only that chain. sub get_sequence { my ($self, $seqno) = (shift, shift); my ($s, $i, $lo, $hi) = (''); #chainname supplied? if (@_) { if (exists $self->{'chain_hash'}->{$_[0]}) { $lo = $self->{'chain_hash'}->{$_[0]}->[0]-1; $hi = $self->{'chain_hash'}->{$_[0]}->[1]; } else { $self->warn("unknown chain: '$_[0]'"); return ''; } } else { $lo = 0; $hi = scalar @{$self->{'structure'}}; } for ($i=$lo; $i < $hi; $i++) { my $c = $self->{'alignment'}->[$seqno]->[$i]; if ($c eq '!') { #discontinuity if ($i > 0 and $self->{'alignment'}->[$seqno]->[$i-1] eq ' ') { $c = ' '; } elsif ($i < $hi-1 and $self->{'alignment'}->[$seqno]->[$i+1] eq ' ') { $c = ' '; } } $s .= $c; } $s; } ########################################################################### package NPB::Parse::Format::HSSP::PROFILE; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { NPB::Message::warn(shift, "new() not implemented") } ########################################################################### package NPB::Parse::Format::HSSP::INSERTION; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { NPB::Message::warn(shift, "new() not implemented") } ########################################################################### 1;