# -*- perl -*- ########################################################################### # # Copyright (C) 1997-2015 Nigel P. Brown # # (i) License # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # (ii) Contacts # # Project Admin: Nigel P. Brown # Email: biomview@gmail.com # Project URL: http://bio-mview.sourceforge.net # # (iii) Citation # # Please acknowledge use of this Program by citing the following reference in # any published work including web-sites: # # Brown, N.P., Leroy C., Sander C. (1998) MView: A Web compatible database # search or multiple alignment viewer. Bioinformatics. 14(4):380-381. # # and provide a link to the MView project URL given above under 'Contacts'. # ########################################################################### # $Id: MULTAS.pm,v 1.10 2015/06/14 17:09:04 npb Exp $ ########################################################################### # # MULTAS parsing consists of repeat records of: # # BLOCK # LIST # ALIGNMENT # # ########################################################################### package NPB::Parse::Format::MULTAS; use vars qw(@ISA); use strict; @ISA = qw(NPB::Parse::Record); #delimit full MULTAS entry my $MULTAS_START = '^Block'; my $MULTAS_END = undef; my $MULTAS_Null = '^\s*$';#' #MULTAS record types my $MULTAS_BLOCK = $MULTAS_START; my $MULTAS_BLOCKend = "(?:$MULTAS_BLOCK|$MULTAS_Null)"; my $MULTAS_LIST = '^\s*\d+\s+seqs'; #my $MULTAS_LISTmid = "^(?:SEED|USER>>)"; my $MULTAS_LISTmid = "^(?:SEED|USER)"; my $MULTAS_LISTend = "(?:(?:>>){0}|$MULTAS_BLOCKend)"; my $MULTAS_ALIGNMENT = '(?:>>){0}'; my $MULTAS_ALIGNMENTend = $MULTAS_BLOCKend; #Consume one entry-worth of input on text stream associated with $file and #return a new MULTAS instance. sub get_entry { my ($parent) = @_; my ($line, $offset, $bytes) = ('', -1, 0); while (defined ($line = $parent->{'text'}->getline)) { #start of entry if ($line =~ /$MULTAS_START/o and $offset < 0) { $offset = $parent->{'text'}->startofline; next; } #end of entry #if ($line =~ /$MULTAS_END/o) { # last; #} } return 0 if $offset < 0; $bytes = $parent->{'text'}->tell - $offset; new NPB::Parse::Format::MULTAS(undef, $parent->{'text'}, $offset, $bytes); } #Parse one entry sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); while (defined ($line = $text->next_line)) { #BLOCK lines if ($line =~ /$MULTAS_BLOCK/o) { $text->scan_until($MULTAS_BLOCKend, 'BLOCK'); next; } #blank line or empty record: ignore if ($line =~ /$MULTAS_Null/o) { next; } #terminal line: ignore #if ($line =~ /$MULTAS_END/o) { # next; #} #default $self->warn("unknown field: $line"); } $self->test_records(qw(BLOCK)); $self;#->examine; } ########################################################################### package NPB::Parse::Format::MULTAS::BLOCK; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); while (defined ($line = $text->next_line)) { #BLOCK number if ($line =~ /$MULTAS_BLOCK\s+(\d+)/) { $self->{'number'} = $1; next; } #LIST lines if ($line =~ /$MULTAS_LIST/o) { $text->scan_while($MULTAS_LISTmid, 'LIST'); next; } # #LIST lines # if ($line =~ /$MULTAS_LIST/o) { # $text->scan_until($MULTAS_LISTend, 'LIST'); # next; # } #ALIGNMENT lines if ($line =~ /$MULTAS_ALIGNMENT/o) { $text->scan_until($MULTAS_ALIGNMENTend, 'ALIGNMENT'); next; } #blank line or empty record: ignore if ($line =~ /$MULTAS_Null/o) { next; } #default $self->warn("unknown field: $line"); } $self->test_records(qw(LIST ALIGNMENT)); $self;#->examine; } sub print_data { my ($self, $indent) = (@_, 0); my $x = ' ' x $indent; printf "$x%20s -> %d\n", 'number', $self->{'number'}; } ########################################################################### package NPB::Parse::Format::MULTAS::BLOCK::LIST; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); $self->{'count'} = 0; $self->{'hit'} = []; #ranked search hits while (defined ($line = $text->next_line)) { if ($line =~ /\s*(\d+)\s+seqs/) { $self->{'count'} = $1; next; } if ($line =~ /\s* ((?:SEED|USER)) #source of sequence #\s*>>\s* \s*>+\s* ([^=]+) #identifier \s*=\s* (.*) #description /xo) { $self->test_args($line, $1, $2, $3); push @{$self->{'hit'}}, { 'type' => $1, 'id' => $2, 'desc' => $3, }; #strip leading, trailing, internal white space $self->{'hit'}->[$#{$self->{'hit'}}]->{'id'} =~ s/\s//g; next; } #default $self->warn("unknown field: $line"); } if ($self->{'count'} != @{$self->{'hit'}}) { $self->warn("stated ($self->{'count'}) and parsed (@{[scalar @{$self->{'hit'}}]}) sequence count mismatch"); } $self; } sub print_data { my ($self, $indent) = (@_, 0); my $x = ' ' x $indent; printf "$x%20s -> %d\n", 'count', $self->{'count'}; foreach my $hit (@{$self->{'hit'}}) { foreach my $field (sort keys %$hit) { printf "$x%20s -> '%s'\n", $field, $hit->{$field}; } } } ########################################################################### package NPB::Parse::Format::MULTAS::BLOCK::ALIGNMENT; use vars qw(@ISA); @ISA = qw(NPB::Parse::Record); sub new { my $type = shift; if (@_ < 2) { #at least two args, ($offset, $bytes are optional). NPB::Message::die($type, "new() invalid arguments (@_)"); } my ($parent, $text, $offset, $bytes) = (@_, -1, -1); my ($self, $line, $record); $self = new NPB::Parse::Record($type, $parent, $text, $offset, $bytes); $text = new NPB::Parse::Record_Stream($self); my (@tmp, $i); $self->{'seq'} = []; $self->{'length'} = 0; $self->{'count'} = 0; while (defined ($line = $text->next_line)) { chomp $line; @tmp = split(//, $line); for ($i=0; $i<@tmp; $i++) { push @{$self->{'seq'}->[$i]}, $tmp[$i]; } } $self->{'length'} = @{$self->{'seq'}->[0]}; $self->{'count'} = @{$self->{'seq'}}; #check alignments all same length, then make string foreach $i (@{$self->{'seq'}}) { if (@$i != $self->{'length'}) { $self->warn("alignment lengths differ"); } $i = join('', @$i); } $self; } sub print_data { my ($self, $indent) = (@_, 0); my $x = ' ' x $indent; printf "$x%20s -> %d\n", 'count', $self->{'count'}; printf "$x%20s -> %d\n", 'length', $self->{'length'}; for (my $i=0; $i<@{$self->{'seq'}}; $i++) { printf("$x%20s[%d] -> '%s'\n", 'seq', $i+1, $self->{'seq'}->[$i]); } } ########################################################################### 1;