From: martinahansen Date: Thu, 26 Nov 2009 12:51:35 +0000 (+0000) Subject: cleaned KISS.pm X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=6f05eb1d7ba5c52a287ad0b03f030b4cc4ab96aa;p=biopieces.git cleaned KISS.pm git-svn-id: http://biopieces.googlecode.com/svn/trunk@778 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_perl/Maasha/KISS.pm b/code_perl/Maasha/KISS.pm index 2f30c67..290468a 100644 --- a/code_perl/Maasha/KISS.pm +++ b/code_perl/Maasha/KISS.pm @@ -64,6 +64,10 @@ use constant { sub kiss_entry_get { + # Martin A. Hansen, November 2009. + + # Gets the next KISS entry from a file handle. + my ( $fh, # file handle ) = @_; @@ -101,6 +105,11 @@ sub kiss_entry_get sub kiss_entry_put { + # Martin A. Hansen, November 2009. + + # Outputs a KISS record to a given filehandle + # or STDOUT if no filehandle is given. + my ( $entry, # KISS entry to output $fh, # file handle - OPTIONAL ) = @_; @@ -302,20 +311,32 @@ sub kiss_index_count sub kiss_index_store { - my ( $path, - $index, + # Martin A. Hansen, November 2009. + + # Stores a KISS index to a file. + + my ( $path, # path to KISS index + $index, # KISS index ) = @_; + # Returns nothing. + Maasha::Filesys::file_store( $path, $index ); } sub kiss_index_retrieve { - my ( $path, + # Martin A. Hansen, November 2009. + + # Retrieves a KISS index from a file. + + my ( $path, # Path to KISS index ) = @_; - my $index; + # Returns a data structure. + + my ( $index ); $index = Maasha::Filesys::file_retrieve( $path ); @@ -325,12 +346,20 @@ sub kiss_index_retrieve sub kiss_index_get_entries { - my ( $file, - $index, - $beg, - $end, + # Martin A. Hansen, November 2009. + + # Given a path to a KISS file and a KISS index + # along with a beg/end interval, locate all entries + # in that interval and return those. + + my ( $file, # path to KISS file + $index, # KISS index + $beg, # interval begin + $end, # interval end ) = @_; + # Returns a list. + my ( $offset, $fh, $entry, @entries ); $offset = kiss_index_offset( $index, $beg ); @@ -354,12 +383,20 @@ sub kiss_index_get_entries sub kiss_index_get_blocks { - my ( $index, - $beg, - $end, + # Martin A. Hansen, November 2009. + + # Given a KISS index recursively traverse + # this into the appropriate node size determined + # by the size of the given beg/end interval. + # Blocks consisting of hashes of BEG/END/COUNT + # are returned from the requested node size. + + my ( $index, # KISS index node + $beg, # interval begin + $end, # interval end $level, # index level - OPTIONAL $factor, # index factor - OPTIONAL - $size, + $size, # requested node size ) = @_; # Returns a list. @@ -412,6 +449,10 @@ sub kiss_index_get_blocks sub kiss_align { + # Martin A. Hansen, November 2009. + + # Test routine of correct resolve of ALIGN descriptors. + # S.aur_COL 41 75 5_vnlh2ywXsN1/1 1 - . 17:A>T,31:A>N . . . . my ( $s_seqref, # subject sequence reference @@ -463,18 +504,31 @@ sub kiss_align sub kiss2biopiece { + # Martin A. Hansen, November 2009. + + # Converts a KISS entry to a Biopiece record. + # TODO: Consistency checking + my ( $entry, # KISS entry ) = @_; + # Returns a hashref + return wantarray ? %{ $entry } : $entry; } sub biopiece2kiss { + # Martin A. Hansen, November 2009. + + # Converts a Biopiece record to a KISS entry. + my ( $record, # Biopiece record ) = @_; + # Returns a hashref + $record->{ 'SCORE' } ||= $record->{ 'E_VAL' } || "."; $record->{ 'HITS' } ||= "."; $record->{ 'BLOCK_COUNT' } ||= "."; @@ -491,87 +545,3 @@ sub biopiece2kiss 1; -__END__ - -sub kiss_index -{ - # Martin A, Hansen, October 2009. - - # Creates an index of a sorted KISS file that - # allowing the location of the byte position - # from where records can be read given a - # specific S_BEG position. The index consists of - # triples: [ beg, end, bytepos ], where beg and - # end denotes the interval where the next KISS - # record begins at bytepos. - - my ( $fh, # filehandle to KISS file - ) = @_; - - # Returns a list. - - my ( $line, @fields, $beg, $end, $pos, @index ); - - $beg = 0; - $pos = 0; - - while ( $line = <$fh> ) - { - chomp $line; - - @fields = split /\t/, $line, 3; - - $end = $fields[ S_BEG ]; - - if ( $end == 0 ) - { - push @index, [ $beg, $end, $pos ]; - $beg = 1; - } - elsif ( $end > $beg ) - { - push @index, [ $beg, $end - 1, $pos ]; - $beg = $end; - } - elsif ( $end < $beg ) - { - Maasha::Common::error( qq(KISS file not sorted: beg > end -> $beg > $end) ); - } - - $pos += 1 + length $line; - } - - return wantarray ? @index : \@index; -} - - -sub kiss_index_search -{ - my ( $index, - $num, - ) = @_; - - # Returns a number. - - my ( $high, $low, $try ); - - $low = 0; - $high = scalar @{ $index }; - - while ( $low <= $high ) - { - $try = int( ( $high + $low ) / 2 ); - - if ( $num < $index->[ $try ]->[ 0 ] ) { - $high = $try; - } elsif ( $num > $index->[ $try ]->[ 1 ] ) { - $low = $try + 1; - } else { - return $index->[ $try ]->[ 2 ]; - } - } - - Maasha::Common::error( "Could not find number->$num in index" ); -} - -