1 package Maasha::KISS::IO;
3 # Copyright (C) 2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for parsing and emitting KISS records.
27 # http://code.google.com/p/biopieces/wiki/KissFormat
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 use vars qw( @ISA @EXPORT );
42 @ISA = qw( Exporter );
60 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
65 my ( $fh, # file handle
70 my ( $line, @fields, %entry );
72 while ( $line = <$fh> )
76 next if $line =~ /^$|^#/;
78 @fields = split /\t/, $line;
80 Maasha::Common::error( qq( BAD kiss entry: $line) ) if not @fields == 12;
82 $entry{ 'S_ID' } = $fields[ S_ID ];
83 $entry{ 'S_BEG' } = $fields[ S_BEG ];
84 $entry{ 'S_END' } = $fields[ S_END ];
85 $entry{ 'Q_ID' } = $fields[ Q_ID ];
86 $entry{ 'SCORE' } = $fields[ SCORE ];
87 $entry{ 'STRAND' } = $fields[ STRAND ];
88 $entry{ 'HITS' } = $fields[ HITS ];
89 $entry{ 'ALIGN' } = $fields[ ALIGN ];
90 $entry{ 'BLOCK_COUNT' } = $fields[ BLOCK_COUNT ];
91 $entry{ 'BLOCK_BEGS' } = $fields[ BLOCK_BEGS ];
92 $entry{ 'BLOCK_LENS' } = $fields[ BLOCK_LENS ];
93 $entry{ 'BLOCK_TYPE' } = $fields[ BLOCK_TYPE ];
95 return wantarray ? %entry : \%entry;
102 my ( $entry, # KISS entry to output
103 $fh, # file handle - OPTIONAL
112 $fields[ S_ID ] = $entry->{ 'S_ID' };
113 $fields[ S_BEG ] = $entry->{ 'S_BEG' };
114 $fields[ S_END ] = $entry->{ 'S_END' };
115 $fields[ Q_ID ] = $entry->{ 'Q_ID' };
116 $fields[ SCORE ] = $entry->{ 'SCORE' };
117 $fields[ STRAND ] = $entry->{ 'STRAND' };
118 $fields[ HITS ] = $entry->{ 'HITS' };
119 $fields[ ALIGN ] = $entry->{ 'ALIGN' };
120 $fields[ BLOCK_COUNT ] = $entry->{ 'BLOCK_COUNT' };
121 $fields[ BLOCK_BEGS ] = $entry->{ 'BLOCK_BEGS' };
122 $fields[ BLOCK_LENS ] = $entry->{ 'BLOCK_LENS' };
123 $fields[ BLOCK_TYPE ] = $entry->{ 'BLOCK_TYPE' };
125 print $fh join( "\t", @fields ), "\n";
131 my ( $dbh, # Database handle
133 $s_beg, # Subject begin
134 $s_end, # Subject end
137 my ( $sql, $entries );
139 # $sql = "SELECT * FROM $table WHERE S_BEG >= $s_beg AND S_END <= $s_end ORDER BY S_BEG,S_END";
140 $sql = "SELECT S_BEG,S_END,Q_ID,ALIGN FROM $table WHERE S_BEG >= $s_beg AND S_END <= $s_end";
142 $entries = Maasha::SQL::query_hashref_list( $dbh, $sql );
144 return wantarray ? @{ $entries } : $entries;
150 # Martin A, Hansen, October 2009.
152 # Creates an index of a sorted KISS file that
153 # allowing the location of the byte position
154 # from where records can be read given a
155 # specific S_BEG position. The index consists of
156 # triples: [ beg, end, bytepos ], where beg and
157 # end denotes the interval where the next KISS
158 # record begins at bytepos.
160 my ( $fh, # filehandle to KISS file
165 my ( $line, @fields, $beg, $end, $pos, @index );
170 while ( $line = <$fh> )
174 @fields = split /\t/, $line, 3;
176 $end = $fields[ S_BEG ];
180 push @index, [ $beg, $end, $pos ];
183 elsif ( $end > $beg )
185 push @index, [ $beg, $end - 1, $pos ];
190 Maasha::Common::error( qq(KISS file not sorted: $end < $beg) );
193 $pos += 1 + length $line;
196 return wantarray ? @index : \@index;
206 Maasha::Filesys::file_store( $path, $index );
210 sub kiss_index_retrieve
217 $index = Maasha::Filesys::file_retrieve( $path );
219 return wantarray ? @{ $index } : $index;
223 sub kiss_index_search
231 my ( $high, $low, $try );
234 $high = scalar @{ $index };
236 while ( $low <= $high )
238 $try = int( ( $high + $low ) / 2 );
240 if ( $num < $index->[ $try ]->[ 0 ] ) {
242 } elsif ( $num > $index->[ $try ]->[ 1 ] ) {
245 return $index->[ $try ]->[ 2 ];
249 Maasha::Common::error( "Could not find number->$num in index" );
260 my ( $index, $offset, $fh, $entry, @entries );
262 $index = Maasha::KISS::IO::kiss_index_retrieve( "$file.index" );
264 $offset = Maasha::KISS::IO::kiss_index_search( $index, $beg );
266 $fh = Maasha::Filesys::file_read_open( $file );
268 sysseek( $fh, $offset, 0 );
270 while ( $entry = kiss_entry_get( $fh ) )
272 push @entries, $entry;
274 last if $entry->{ 'S_END' } > $end;
279 return wantarray ? @entries : \@entries;
285 # S.aur_COL 41 75 5_vnlh2ywXsN1/1 1 - . 17:A>T,31:A>N . . . .
287 my ( $s_seqref, # subject sequence reference
293 my ( $align, $pos, $s_symbol, $q_symbol, $s_seq, $q_seq, $insertions );
295 $s_seq = substr ${ $s_seqref }, $entry->{ 'S_BEG' }, $entry->{ 'S_END' } - $entry->{ 'S_BEG' } + 1;
300 foreach $align ( split /,/, $entry->{ 'ALIGN' } )
302 if ( $align =~ /(\d+):(.)>(.)/ )
308 if ( $s_symbol eq '-' ) # insertion
310 substr $s_seq, $pos + $insertions, 0, $s_symbol;
311 substr $q_seq, $pos + $insertions, 0, $q_symbol;
315 elsif ( $q_symbol eq '-' ) # deletion
317 substr $q_seq, $pos + $insertions, 1, $q_symbol;
321 substr $q_seq, $pos + $insertions, 1, $q_symbol;
326 Maasha::Common::error( qq(Bad alignment descriptor: "$align") );
330 Maasha::Align::align_print_pairwise( [ $entry->{ 'S_ID' }, $s_seq ], [ $entry->{ 'Q_ID' }, $q_seq ] );
336 my ( $entry, # KISS entry
339 return wantarray ? %{ $entry } : $entry;
345 my ( $record, # Biopiece record
348 $record->{ 'HITS' } ||= ".";
349 $record->{ 'BLOCK_COUNT' } ||= ".";
350 $record->{ 'BLOCK_BEGS' } ||= ".";
351 $record->{ 'BLOCK_LENS' } ||= ".";
352 $record->{ 'BLOCK_TYPE' } ||= ".";
353 $record->{ 'ALIGN' } ||= $record->{ 'DESCRIPTOR' } || ".";
355 return wantarray ? %{ $record } : $record;
359 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<