1 package Maasha::Stockholm;
3 # Copyright (C) 2006 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of the Stockholm format.
26 # http://www.cgb.ki.se/cgb/groups/sonnhammer/Stockholm.html
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
35 use vars qw ( @ISA @EXPORT );
37 @ISA = qw( Exporter );
40 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
43 sub get_stockholm_entry
45 # Martin A. Hansen, February 2007.
47 # Given a file handle, returns the next stockholm
48 # entry as a list of lines.
50 my ( $fh, # file handle
57 while ( defined $fh and $line = <$fh> )
63 last if $line eq "//";
69 return wantarray ? @lines : \@lines;
74 sub parse_stockholm_entry
76 # Martin A. Hansen, February 2007.
78 # given a Stockholm entry as a list of lines,
79 # parses this into an elaborate data structure.
80 # Compultory fields: AC ID DE AU SE SS BM GA TC NC TP SQ
81 # Non-compultory fields: PI DC DR RC RN RM RT RA RL CC
83 my ( $entry, # stockholm entry
86 # returns data structure
88 my ( $line, %hash, %align_hash, @align_list, @align );
90 foreach $line ( @{ $entry } )
92 next if $line =~ /^# /;
94 if ( $line =~ /^#=GF\s+([^\s]+)\s+(.*)$/ )
96 push @{ $hash{ "GF" }{ $1 } }, $2;
98 elsif ( $line =~ /^#=GC\s+([^\s]+)\s+(.*)$/ )
100 push @{ $hash{ "GC" }{ $1 } }, $2;
102 elsif ( $line =~ /^#=GS\s+([^\s]+)\s+([^\s]+)\s+(.*)$/ )
104 push @{ $hash{ "GS" }{ $1 }{ $2 } }, $3;
106 elsif ( $line =~ /^#=GR\s+([^\s]+)\s+([^\s]+)\s+(.*)$/ )
108 push @{ $hash{ "GR" }{ $1 }{ $2 } }, $3;
110 elsif ( $line =~ /^([^\s]+)\s+(.+)$/ )
112 push @align_list, $1 if not exists $align_hash{ $1 };
114 $align_hash{ $1 } .= $2;
118 map { $hash{ "GF" }{ $_ } = join " ", @{ $hash{ "GF" }{ $_ } } } keys %{ $hash{ "GF" } };
119 map { $hash{ "GC" }{ $_ } = join "", @{ $hash{ "GC" }{ $_ } } } keys %{ $hash{ "GC" } };
120 map { push @align, [ $_, $align_hash{ $_ } ] } @align_list;
122 push @align, [ "SS_cons", $hash{ "GC" }{ "SS_cons" } ];
123 push @align, [ "RF", $hash{ "GC" }{ "RF" } ] if $hash{ "GC" }{ "RF" };
125 delete $hash{ "GC" };
127 $hash{ "ALIGN" } = \@align;
129 return wantarray ? %hash : \%hash;
133 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<