1 package Maasha::Stockholm;
3 # Copyright (C) 2006 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of the Stockholm format.
26 # http://www.cgb.ki.se/cgb/groups/sonnhammer/Stockholm.html
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
36 use vars qw ( @ISA @EXPORT );
38 @ISA = qw( Exporter );
41 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
44 sub get_stockholm_entry
46 # Martin A. Hansen, February 2007.
48 # Given a file handle, returns the next stockholm
49 # entry as a list of lines.
51 my ( $fh, # file handle
58 while ( defined $fh and $line = <$fh> )
64 last if $line eq "//";
70 return wantarray ? @lines : \@lines;
75 sub parse_stockholm_entry
77 # Martin A. Hansen, February 2007.
79 # given a Stockholm entry as a list of lines,
80 # parses this into an elaborate data structure.
81 # Compultory fields: AC ID DE AU SE SS BM GA TC NC TP SQ
82 # Non-compultory fields: PI DC DR RC RN RM RT RA RL CC
84 my ( $entry, # stockholm entry
87 # returns data structure
89 my ( $line, %hash, %align_hash, @align_list, @align );
91 foreach $line ( @{ $entry } )
93 next if $line =~ /^# /;
95 if ( $line =~ /^#=GF\s+([^\s]+)\s+(.*)$/ )
97 push @{ $hash{ "GF" }{ $1 } }, $2;
99 elsif ( $line =~ /^#=GC\s+([^\s]+)\s+(.*)$/ )
101 push @{ $hash{ "GC" }{ $1 } }, $2;
103 elsif ( $line =~ /^#=GS\s+([^\s]+)\s+([^\s]+)\s+(.*)$/ )
105 push @{ $hash{ "GS" }{ $1 }{ $2 } }, $3;
107 elsif ( $line =~ /^#=GR\s+([^\s]+)\s+([^\s]+)\s+(.*)$/ )
109 push @{ $hash{ "GR" }{ $1 }{ $2 } }, $3;
111 elsif ( $line =~ /^([^\s]+)\s+(.+)$/ )
113 push @align_list, $1 if not exists $align_hash{ $1 };
115 $align_hash{ $1 } .= $2;
119 map { $hash{ "GF" }{ $_ } = join " ", @{ $hash{ "GF" }{ $_ } } } keys %{ $hash{ "GF" } };
120 map { $hash{ "GC" }{ $_ } = join "", @{ $hash{ "GC" }{ $_ } } } keys %{ $hash{ "GC" } };
121 map { push @align, [ $_, $align_hash{ $_ } ] } @align_list;
123 push @align, [ "SS_cons", $hash{ "GC" }{ "SS_cons" } ];
124 push @align, [ "RF", $hash{ "GC" }{ "RF" } ] if $hash{ "GC" }{ "RF" };
126 delete $hash{ "GC" };
128 $hash{ "ALIGN" } = \@align;
130 return wantarray ? %hash : \%hash;
134 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<