3 # Copyright (C) 2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines to parse ACE format.
28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
39 use vars qw ( @ISA @EXPORT );
42 @ISA = qw( Exporter );
45 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
50 # Martin A. Hansen, December 2009.
52 # Fetches the next contig from an ACE file.
57 my ( $block, $line, @lines, $records );
61 while ( $block = <$fh> )
65 next if $block =~ /^AS/;
69 @lines = split "\n", "CO $block";
71 $records = ace_entry_parse( \@lines );
73 return wantarray ? @{ $records } : $records;
81 # Martin A. Hansen, December 2009.
83 # Parses an ACE entry given as a list of lines and
84 # returns a list of hashref.
86 my ( $entry, # list of lines
91 my ( $i, $contig, $reads, $read_name, $read_seq, $record, @records );
95 while ( $i < scalar @{ $entry } )
97 if ( $entry->[ $i ] =~ /^CO ([^ ]+) (\d+) (\d+) (\d+) (U|C)/ )
99 $contig->{ 'CONTIG_NAME' } = $1;
100 $contig->{ 'CONTIG_BASES' } = $2;
101 $contig->{ 'CONTIG_READS' } = $3;
102 $contig->{ 'CONTIG_SEGMENT' } = $4;
103 $contig->{ 'CONTIG_COMPLEMENT' } = $5;
104 $contig->{ 'CONTIG_SEQ' } = "";
108 while ( $entry->[ $i ] ne "" )
110 $contig->{ 'CONTIG_SEQ' } .= $entry->[ $i ];
115 elsif ( $entry->[ $i ] =~ /^AF ([^ ]+) (U|C) (-?\d+)/ )
117 $reads->{ $1 }->{ 'AF_READ_COMPLEMENT' } = $2;
118 $reads->{ $1 }->{ 'AF_READ_BEG' } = ( $3 - 1 );
120 elsif ( $entry->[ $i ] =~ /^BS (\d+) (\d+) (.+)/ )
124 elsif ( $entry->[ $i ] =~ /^RD ([^ ]+) (\d+) (\d+) (\d+)/ )
128 $reads->{ $read_name }->{ 'RD_READ_LEN' } = $2;
129 $reads->{ $read_name }->{ 'RD_READ_ITEMS' } = $3;
130 $reads->{ $read_name }->{ 'RD_READ_TAGS' } = $4;
136 while ( $entry->[ $i ] ne "" )
138 $read_seq .= $entry->[ $i ];
143 $reads->{ $read_name }->{ 'RD_READ_SEQ' } = $read_seq;
145 elsif ( $entry->[ $i ] =~ /^QA (\d+) (\d+) (\d+) (\d+)/ )
147 $reads->{ $read_name }->{ 'QA_CLIP_BEG' } = ( $1 - 1 );
148 $reads->{ $read_name }->{ 'QA_CLIP_END' } = ( $2 - 1 );
149 $reads->{ $read_name }->{ 'QA_ALIGN_BEG' } = ( $3 - 1 );
150 $reads->{ $read_name }->{ 'QA_ALIGN_END' } = ( $4 - 1 );
156 my ( $read, $s_seq, $q_seq, $len, $align );
158 foreach $read ( keys %{ $reads } )
160 $len = $reads->{ $read }->{ 'QA_ALIGN_END' } - $reads->{ $read }->{ 'QA_ALIGN_BEG' } + 1;
161 $s_seq = substr $contig->{ 'CONTIG_SEQ' }, $reads->{ $read }->{ 'AF_READ_BEG' } + $reads->{ $read }->{ 'QA_ALIGN_BEG' }, $len;
162 $q_seq = substr $reads->{ $read }->{ 'RD_READ_SEQ' }, $reads->{ $read }->{ 'QA_ALIGN_BEG' }, $len;
167 if ( $reads->{ $read }->{ 'AF_READ_COMPLEMENT' } eq 'C' )
169 $record->{ 'STRAND' } = '-';
171 Maasha::Seq::dna_comp( \$s_seq );
172 Maasha::Seq::dna_comp( \$q_seq );
176 $record->{ 'STRAND' } = '+';
179 # print Dumper( $read, $reads->{ $read } );
181 $align = Maasha::KISS::kiss_align_enc( \$s_seq, \$q_seq, $reads->{ $read }->{ 'QA_ALIGN_BEG' } );
183 $record->{ 'S_ID' } = $contig->{ 'CONTIG_NAME' };
184 $record->{ 'S_BEG' } = $reads->{ $read }->{ 'AF_READ_BEG' };
185 $record->{ 'S_END' } = $record->{ 'S_BEG' } + $len - 1; # FIXME
186 $record->{ 'Q_ID' } = $read;
187 $record->{ 'Q_ID' } =~ s/\..+//;
188 $record->{ 'ALIGN' } = join ",", @{ $align };
193 print "S_SEQ: $s_seq\n";
194 print "Q_SEQ: $q_seq\n\n";
195 print "READ: $read\n";
196 print Dumper( $reads->{ $read } );
197 print Dumper( $record );
200 if ( $record->{ 'S_BEG' } >= 0 ) # FIXME
202 push @records, Storable::dclone $record;
206 return wantarray ? @records : \@records;
210 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<