use warnings;
use strict;
use Data::Dumper;
+use JSON::XS;
use Maasha::Common;
use Maasha::Filesys;
use Maasha::Align;
+
use vars qw( @ISA @EXPORT );
@ISA = qw( Exporter );
INDEX_BLOCK_SIZE => 100,
INDEX_LEVEL => 100_000_000,
INDEX_FACTOR => 100,
+
+ BUCKET_SIZE => 100,
+ COUNT => 0,
+ OFFSET => 1,
+
+ INDEX_BEG => 1,
+ INDEX_END => 2,
+ INDEX => 12,
};
# Returns a hashref.
- my ( $line, @fields, %entry );
+ my ( $line, @fields );
while ( $line = <$fh> )
{
Maasha::Common::error( qq(BAD kiss entry: $line) ) if not @fields == 12;
- $entry{ 'S_ID' } = $fields[ S_ID ];
- $entry{ 'S_BEG' } = $fields[ S_BEG ];
- $entry{ 'S_END' } = $fields[ S_END ];
- $entry{ 'Q_ID' } = $fields[ Q_ID ];
- $entry{ 'SCORE' } = $fields[ SCORE ];
- $entry{ 'STRAND' } = $fields[ STRAND ];
- $entry{ 'HITS' } = $fields[ HITS ];
- $entry{ 'ALIGN' } = $fields[ ALIGN ];
- $entry{ 'BLOCK_COUNT' } = $fields[ BLOCK_COUNT ];
- $entry{ 'BLOCK_BEGS' } = $fields[ BLOCK_BEGS ];
- $entry{ 'BLOCK_LENS' } = $fields[ BLOCK_LENS ];
- $entry{ 'BLOCK_TYPE' } = $fields[ BLOCK_TYPE ];
-
- return wantarray ? %entry : \%entry;
+ return wantarray ? @fields : \@fields;
}
}
+sub kiss_retrieve
+{
+ # Martin A. Hansen, February 2010.
+
+ # Retrieves KISS entries from a given sorted KISS file
+ # within an optional interval.
+
+ my ( $file, # path to KISS file
+ $beg, # interval begin - OPTIONAL
+ $end, # interval end - OPTIONAL
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $fh, $entry, @entries );
+
+ $beg ||= 0;
+ $end ||= 999999999;
+
+ $fh = Maasha::Filesys::file_read_open( $file );
+
+ while ( $entry = kiss_entry_get( $fh ) )
+ {
+ last if $entry->[ S_BEG ] > $end;
+ push @entries, $entry if $entry->[ S_END ] > $beg;
+ }
+
+ close $fh;
+
+ return wantarray ? @entries : \@entries;
+}
+
+
sub kiss_entry_parse
{
# Martin A. Hansen, December 2009.
Maasha::Common::error( qq(BAD kiss entry: $line) ) if not @fields == 12;
- $entry{ 'S_ID' } = $fields[ S_ID ];
- $entry{ 'S_BEG' } = $fields[ S_BEG ];
- $entry{ 'S_END' } = $fields[ S_END ];
- $entry{ 'Q_ID' } = $fields[ Q_ID ];
- $entry{ 'SCORE' } = $fields[ SCORE ];
- $entry{ 'STRAND' } = $fields[ STRAND ];
- $entry{ 'HITS' } = $fields[ HITS ];
- $entry{ 'ALIGN' } = $fields[ ALIGN ];
- $entry{ 'BLOCK_COUNT' } = $fields[ BLOCK_COUNT ];
- $entry{ 'BLOCK_BEGS' } = $fields[ BLOCK_BEGS ];
- $entry{ 'BLOCK_LENS' } = $fields[ BLOCK_LENS ];
- $entry{ 'BLOCK_TYPE' } = $fields[ BLOCK_TYPE ];
-
- return wantarray ? %entry : \%entry;
+ return wantarray ? @fields : \@fields;
}
# Returns nothing.
- my ( @fields );
+ Maasha::Common::error( qq(BAD kiss entry) ) if not scalar @{ $entry } == 12;
- if ( defined $entry->{ 'S_ID' } and
- defined $entry->{ 'S_BEG' } and
- defined $entry->{ 'S_END' }
+ if ( defined $entry->[ S_ID ] and
+ defined $entry->[ S_BEG ] and
+ defined $entry->[ S_END ]
)
{
- Maasha::Common::error( qq(Bad S_BEG value: $entry->{ 'S_BEG' } < 0 ) ) if $entry->{ 'S_BEG' } < 0;
- Maasha::Common::error( qq(Bad S_END value: $entry->{ 'S_END' } < $entry->{ 'S_BEG' }) ) if $entry->{ 'S_END' } < $entry->{ 'S_BEG' };
+ Maasha::Common::error( qq(Bad S_BEG value: $entry->[ S_BEG ] < 0 ) ) if $entry->[ S_BEG ] < 0;
+ Maasha::Common::error( qq(Bad S_END value: $entry->[ S_END ] < $entry->[ S_BEG ] ) ) if $entry->[ S_END ] < $entry->[ S_BEG ];
$fh ||= \*STDOUT;
- $fields[ S_ID ] = $entry->{ 'S_ID' };
- $fields[ S_BEG ] = $entry->{ 'S_BEG' };
- $fields[ S_END ] = $entry->{ 'S_END' };
- $fields[ Q_ID ] = $entry->{ 'Q_ID' } || ".";
- $fields[ SCORE ] = $entry->{ 'SCORE' } || ".";
- $fields[ STRAND ] = $entry->{ 'STRAND' } || ".";
- $fields[ HITS ] = $entry->{ 'HITS' } || ".";
- $fields[ ALIGN ] = $entry->{ 'ALIGN' } || ".";
- $fields[ BLOCK_COUNT ] = $entry->{ 'BLOCK_COUNT' } || ".";
- $fields[ BLOCK_BEGS ] = $entry->{ 'BLOCK_BEGS' } || ".";
- $fields[ BLOCK_LENS ] = $entry->{ 'BLOCK_LENS' } || ".";
- $fields[ BLOCK_TYPE ] = $entry->{ 'BLOCK_TYPE' } || ".";
-
- print $fh join( "\t", @fields ), "\n";
+ $entry->[ Q_ID ] = "." if not defined $entry->[ Q_ID ];
+ $entry->[ SCORE ] = "." if not defined $entry->[ SCORE ];
+ $entry->[ STRAND ] = "." if not defined $entry->[ STRAND ];
+ $entry->[ HITS ] = "." if not defined $entry->[ HITS ];
+ $entry->[ ALIGN ] = "." if not defined $entry->[ ALIGN ];
+ $entry->[ BLOCK_COUNT ] = "." if not defined $entry->[ BLOCK_COUNT ];
+ $entry->[ BLOCK_BEGS ] = "." if not defined $entry->[ BLOCK_BEGS ];
+ $entry->[ BLOCK_LENS ] = "." if not defined $entry->[ BLOCK_LENS ];
+ $entry->[ BLOCK_TYPE ] = "." if not defined $entry->[ BLOCK_TYPE ];
+
+ print $fh join( "\t", @{ $entry } ), "\n";
}
}
# Returns nothing.
- `sort -k 2,2n -k 3,3n $file > $file.sort`;
+ `sort -k 2,2n -k 3,3nr $file > $file.sort`;
rename "$file.sort", $file;
}
-sub kiss_intersect
+sub kiss_index
{
# Martin A. Hansen, December 2009.
- # Given filehandles to two different unsorted KISS files
- # intersect the entries so that all entries from file1 that
- # overlap entries in file2 are returned - unless the inverse flag
- # is given in which case entreis from file1 that does not
- # overlap any entries in file2 are returned.
+ # Creates a lookup index of a sorted KISS file.
- my ( $fh1, # filehandle to file1
- $fh2, # filehandle to file2
- $inverse, # flag indicating inverse matching - OPTIONAL
+ my ( $file, # path to KISS file
) = @_;
- # Returns a list
+ # Returns nothing.
- my ( $entry, %lookup, $pos, $overlap, @entries );
+ my ( $fh, $offset, $line, $s_beg, $bucket, $index );
- while ( $entry = kiss_entry_get( $fh2 ) ) {
- map { $lookup{ $_ } = 1 } ( $entry->{ 'S_BEG' } .. $entry->{ 'S_END' } );
- }
+ $fh = Maasha::Filesys::file_read_open( $file );
- while ( $entry = kiss_entry_get( $fh1 ) )
+ $offset = 0;
+
+ while ( $line = <$fh> )
{
- $overlap = 0;
+ ( undef, $s_beg ) = split "\t", $line, 3;
- foreach $pos ( $entry->{ 'S_BEG' } .. $entry->{ 'S_END' } )
- {
- if ( exists $lookup{ $pos } )
- {
- $overlap = 1;
+ $bucket = int( $s_beg / BUCKET_SIZE );
- last;
- }
- }
+ $index->[ $bucket ]->[ COUNT ]++;
+ $index->[ $bucket ]->[ OFFSET ] = $offset if not defined $index->[ $bucket ]->[ OFFSET ];
- if ( $overlap and not $inverse ) {
- push @entries, $entry;
- } elsif ( not $overlap and $inverse ) {
- push @entries, $entry;
- }
+ $offset += length $line;
}
- return wantarray ? @entries : \@entries;
+ close $fh;
+
+ Maasha::KISS::kiss_index_store( "$file.index", $index );
}
-sub kiss_index
+sub kiss_index_offset
{
- # Martin A, Hansen, November 2009.
+ # Martin A. Hansen, December 2009.
- # Creates an index of a sorted KISS file.
+ # Given a KISS index and a begin position,
+ # locate the offset closest to the begin position,
+ # and return this.
- my ( $file, # KISS file
+ my ( $index, # KISS index
+ $beg, # begin position
) = @_;
- # Returns a hashref.
+ # Returns a number.
- my ( $tree, $offset, $fh, $line, $beg );
+ my ( $bucket );
- $tree = {};
- $offset = 0;
+ Maasha::Common::error( qq(Negative begin position: "$beg") ) if $beg < 0;
- $fh = Maasha::Filesys::file_read_open( $file );
+ $bucket = int( $beg / BUCKET_SIZE );
- while ( $line = <$fh> )
+ $bucket = scalar @{ $index } if $bucket > scalar @{ $index };
+
+ while ( $bucket >= 0 )
{
- ( undef, $beg ) = split "\t", $line, 3;
+ return $index->[ $bucket ]->[ OFFSET ] if defined $index->[ $bucket ];
- kiss_index_node_add( $tree, INDEX_LEVEL, INDEX_FACTOR, $beg, $offset );
-
- $offset += length $line;
+ $bucket--;
}
-
- close $fh;
-
- kiss_index_store( "$file.index", $tree );
}
-sub kiss_index_node_add
+sub kiss_index_count
{
- # Martin A, Hansen, November 2009.
+ # Martin A. Hansen, December 2009.
- # Recursive routine to add nodes to a tree.
+ # Given a KISS index and a begin/end interval
+ # sum the number of counts in that interval,
+ # and return this.
- my ( $node,
- $level,
- $factor,
- $beg,
- $offset,
- $sum,
+ my ( $index, # KISS index
+ $beg, # Begin position
+ $end, # End position
) = @_;
-
- my ( $bucket );
-
- $sum ||= 0;
- $bucket = int( $beg / $level );
-
- if ( $level >= $factor )
- {
- $sum += $bucket * $level;
- $beg -= $bucket * $level;
-
- $node->{ 'CHILDREN' }->[ $bucket ]->{ 'COUNT' }++;
- # $node->{ 'CHILDREN' }->[ $bucket ]->{ 'LEVEL' } = $level;
- # $node->{ 'CHILDREN' }->[ $bucket ]->{ 'BUCKET' } = $bucket;
- $node->{ 'CHILDREN' }->[ $bucket ]->{ 'BEG' } = $sum;
- $node->{ 'CHILDREN' }->[ $bucket ]->{ 'END' } = $sum + $level - 1;
- $node->{ 'CHILDREN' }->[ $bucket ]->{ 'OFFSET' } = $offset if not defined $node->{ 'CHILDREN' }->[ $bucket ]->{ 'OFFSET' };
-
- kiss_index_node_add( $node->{ 'CHILDREN' }->[ $bucket ], $level / $factor, $factor, $beg, $offset, $sum );
- }
-}
+ # Returns a number.
-sub kiss_index_offset
-{
- # Martin A. Hansen, November 2009.
-
- # Given a KISS index and a begin position,
- # locate the offset closest to the begin position,
- # and return this.
-
- my ( $index, # KISS index
- $beg, # begin position
- $level, # index level - OPTIONAL
- $factor, # index factor - OPTIONAL
- ) = @_;
+ my ( $bucket_beg, $bucket_end, $count, $i );
- # Returns a number.
+ Maasha::Common::error( qq(Negative begin position: "$beg") ) if $beg < 0;
- my ( $child, $offset );
+ $bucket_beg = int( $beg / BUCKET_SIZE );
+ $bucket_end = int( $end / BUCKET_SIZE );
- $level ||= INDEX_LEVEL;
- $factor ||= INDEX_FACTOR;
+ $bucket_end = scalar @{ $index } if $bucket_end > scalar @{ $index };
- foreach $child ( @{ $index->{ 'CHILDREN' } } )
- {
- next if not defined $child;
+ $count = 0;
- if ( $child->{ 'BEG' } <= $beg and $beg <= $child->{ 'END' } )
- {
- if ( $level == $factor ) {
- $offset = $child->{ 'OFFSET' };
- } else {
- $offset = kiss_index_offset( $child, $beg, $level / $factor, $factor );
- }
- }
+ for ( $i = $bucket_beg; $i <= $bucket_end; $i++ ) {
+ $count += $index->[ $i ]->[ COUNT ] if defined $index->[ $i ];
}
- # Maasha::Common::error( "No offset" ) if not defined $offset; # FIXME
-
- return $offset;
+ return $count;
}
-sub kiss_index_count
+sub kiss_index_count_nc
{
- # Martin A. Hansen, November 2009.
-
+ # Martin A. Hansen, December 2009.
+
# Given a KISS index and a begin/end interval
# sum the number of counts in that interval,
# and return this.
my ( $index, # KISS index
$beg, # Begin position
$end, # End position
- $level, # index level - OPTIONAL
- $factor, # index factor - OPTIONAL
) = @_;
# Returns a number.
-
- my ( $count, $child );
- $level ||= INDEX_LEVEL;
- $factor ||= INDEX_FACTOR;
- $count ||= 0;
+ my ( $count );
- foreach $child ( @{ $index->{ 'CHILDREN' } } )
- {
- next if not defined $child;
+ Maasha::Common::error( qq(Negative begin position: "$beg") ) if $beg < 0;
- if ( $level >= $factor )
- {
- if ( Maasha::Calc::overlap( $beg, $end, $child->{ 'BEG' }, $child->{ 'END' } ) )
- {
- $count += $child->{ 'COUNT' } if $level == $factor;
- $count += kiss_index_count( $child, $beg, $end, $level / $factor, $factor );
- }
- }
- }
+ $count = Maasha::NClist::nc_list_count_interval( $index, $beg, $end, INDEX_BEG, INDEX_END, INDEX );
return $count;
}
-sub kiss_index_store
+sub kiss_index_get_entries
{
# Martin A. Hansen, November 2009.
- # Stores a KISS index to a file.
+ # Given a path to a KISS file and a KISS index
+ # along with a beg/end interval, locate all entries
+ # in that interval and return those.
- my ( $path, # path to KISS index
+ my ( $file, # path to KISS file
$index, # KISS index
+ $beg, # interval begin
+ $end, # interval end
) = @_;
- # Returns nothing.
-
- Maasha::Filesys::file_store( $path, $index );
-}
-
+ # Returns a list.
-sub kiss_index_retrieve
-{
- # Martin A. Hansen, November 2009.
+ my ( $offset, $fh, $entry, @entries );
- # Retrieves a KISS index from a file.
+ # $offset = kiss_index_offset( $index, $beg );
- my ( $path, # Path to KISS index
- ) = @_;
+ $fh = Maasha::Filesys::file_read_open( $file );
- # Returns a data structure.
+ # sysseek( $fh, $offset, 0 );
- my ( $index );
+ while ( $entry = Maasha::KISS::kiss_entry_get( $fh ) )
+ {
+ push @entries, $entry if $entry->[ S_END ] > $beg;
+
+ last if $entry->[ S_BEG ] > $end;
+ }
- $index = Maasha::Filesys::file_retrieve( $path );
+ close $fh;
- return wantarray ? @{ $index } : $index;
+ return wantarray ? @entries : \@entries;
}
-sub kiss_index_get_entries
+sub kiss_index_get_entries_OLD
{
# Martin A. Hansen, November 2009.
sysseek( $fh, $offset, 0 );
- while ( $entry = kiss_entry_get( $fh ) )
+ while ( $entry = Maasha::KISS::kiss_entry_get( $fh ) )
{
- push @entries, $entry if $entry->{ 'S_END' } > $beg;
-
- last if $entry->{ 'S_BEG' } > $end;
+ push @entries, $entry if $entry->[ S_END ] > $beg;
+
+ last if $entry->[ S_BEG ] > $end;
}
close $fh;
sub kiss_index_get_blocks
{
- # Martin A. Hansen, November 2009.
+ # Martin A. Hansen, December 2009.
- # Given a KISS index recursively traverse
- # this into the appropriate node size determined
- # by the size of the given beg/end interval.
- # Blocks consisting of hashes of BEG/END/COUNT
- # are returned from the requested node size.
+ # Given a KISS index returns blocks from this in a
+ # given position interval. Blocks consisting of
+ # hashes of BEG/END/COUNT.
my ( $index, # KISS index node
$beg, # interval begin
$end, # interval end
- $level, # index level - OPTIONAL
- $factor, # index factor - OPTIONAL
- $size, # requested node size
) = @_;
# Returns a list.
-
- my ( $len, @blocks, $child );
-
- $level ||= INDEX_LEVEL;
- $factor ||= INDEX_FACTOR;
-
- $size ||= 100; # TODO: lazy list loading?
-
-# if ( not defined $size )
-# {
-# $len = $end - $beg + 1;
-#
-# if ( $len > 100_000_000 ) {
-# $size = 1_000_000;
-# } elsif ( $len > 1_000_000 ) {
-# $size = 10_000;
-# } else {
-# $size = 100;
-# }
-# }
-
- if ( $level >= $size )
+
+ my ( $bucket_beg, $bucket_end, $i, @blocks );
+
+ Maasha::Common::error( qq(Negative begin position: "$beg") ) if $beg < 0;
+
+ $bucket_beg = int( $beg / BUCKET_SIZE );
+ $bucket_end = int( $end / BUCKET_SIZE );
+
+ $bucket_end = scalar @{ $index } if $bucket_end > scalar @{ $index };
+
+ for ( $i = $bucket_beg; $i <= $bucket_end; $i++ )
{
- foreach $child ( @{ $index->{ 'CHILDREN' } } )
+ if ( defined $index->[ $i ] )
{
- next if not defined $child;
+ push @blocks, {
+ BEG => BUCKET_SIZE * $i,
+ END => BUCKET_SIZE * ( $i + 1 ),
+ COUNT => $index->[ $i ]->[ COUNT ],
+ };
+ }
+ }
+
+ return wantarray ? @blocks : \@blocks;
+}
+
+
+sub kiss_intersect
+{
+ # Martin A. Hansen, December 2009.
+
+ # Given filehandles to two different unsorted KISS files
+ # intersect the entries so that all entries from file1 that
+ # overlap entries in file2 are returned - unless the inverse flag
+ # is given in which case entreis from file1 that does not
+ # overlap any entries in file2 are returned.
+
+ my ( $fh1, # filehandle to file1
+ $fh2, # filehandle to file2
+ $inverse, # flag indicating inverse matching - OPTIONAL
+ ) = @_;
+
+ # Returns a list
+
+ my ( $entry, %lookup, $pos, $overlap, @entries );
+
+ while ( $entry = kiss_entry_get( $fh2 ) ) {
+ map { $lookup{ $_ } = 1 } ( $entry->[ S_BEG ] .. $entry->[ S_END ] );
+ }
+
+ while ( $entry = kiss_entry_get( $fh1 ) )
+ {
+ $overlap = 0;
- if ( Maasha::Calc::overlap( $beg, $end, $child->{ 'BEG' }, $child->{ 'END' } ) )
+ foreach $pos ( $entry->[ S_BEG ] .. $entry->[ S_END ] )
+ {
+ if ( exists $lookup{ $pos } )
{
- if ( $level == $size )
- {
- push @blocks, {
- BEG => $child->{ 'BEG' },
- END => $child->{ 'END' },
- COUNT => $child->{ 'COUNT' },
- };
- }
-
- push @blocks, kiss_index_get_blocks( $child, $beg, $end, $level / $factor, $factor, $size );
+ $overlap = 1;
+
+ last;
}
}
+
+ if ( $overlap and not $inverse ) {
+ push @entries, $entry;
+ } elsif ( not $overlap and $inverse ) {
+ push @entries, $entry;
+ }
}
- return wantarray ? @blocks : \@blocks;
+ return wantarray ? @entries : \@entries;
+}
+
+
+sub kiss_index_store
+{
+ # Martin A. Hansen, November 2009.
+
+ # Stores a KISS index to a file.
+
+ my ( $path, # path to KISS index
+ $index, # KISS index
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $json );
+
+ $json = JSON::XS::encode_json( $index );
+
+ $fh = Maasha::Filesys::file_write_open( $path );
+
+ print $fh $json;
+
+ close $fh;
+}
+
+
+sub kiss_index_retrieve
+{
+ # Martin A. Hansen, November 2009.
+
+ # Retrieves a KISS index from a file.
+
+ my ( $path, # Path to KISS index
+ ) = @_;
+
+ # Returns a data structure.
+
+ my ( $fh, $json, $index );
+
+ local $/ = undef;
+
+ $fh = Maasha::Filesys::file_read_open( $path );
+
+ $json = <$fh>;
+
+ close $fh;
+
+ $index = JSON::XS::decode_json( $json );
+
+ return wantarray ? @{ $index } : $index;
}
# Maasha::Common::error( "Sequence lengths don't match" ) if length ${ $s_seq } != length ${ $q_seq };
- if ( length ${ $s_seq } != length ${ $q_seq } ) { # for unknown reasons this situation may occur - TODO
+ if ( length ${ $s_seq } != length ${ $q_seq } ) { # for unknown reasons this situation may occur - TODO FIXME
return wantarray ? () : [];
}
# Martin A. Hansen, November 2009.
# Converts a KISS entry to a Biopiece record.
- # TODO: Consistency checking
my ( $entry, # KISS entry
) = @_;
# Returns a hashref
- return wantarray ? %{ $entry } : $entry;
+ my ( %record );
+
+ Maasha::Common::error( qq(BAD kiss entry) ) if not scalar @{ $entry } == 12;
+
+ $record{ 'S_ID' } = $entry->[ S_ID ];
+ $record{ 'S_BEG' } = $entry->[ S_BEG ];
+ $record{ 'S_END' } = $entry->[ S_END ];
+ $record{ 'Q_ID' } = $entry->[ Q_ID ];
+ $record{ 'SCORE' } = $entry->[ SCORE ];
+ $record{ 'STRAND' } = $entry->[ STRAND ];
+ $record{ 'HITS' } = $entry->[ HITS ];
+ $record{ 'ALIGN' } = $entry->[ ALIGN ];
+ $record{ 'BLOCK_COUNT' } = $entry->[ BLOCK_COUNT ];
+ $record{ 'BLOCK_BEGS' } = $entry->[ BLOCK_BEGS ];
+ $record{ 'BLOCK_LENS' } = $entry->[ BLOCK_LENS ];
+ $record{ 'BLOCK_TYPE' } = $entry->[ BLOCK_TYPE ];
+
+ return wantarray ? %record : \%record;
}
# Returns a hashref
- $record->{ 'SCORE' } ||= $record->{ 'E_VAL' } || ".";
- $record->{ 'HITS' } ||= ".";
- $record->{ 'BLOCK_COUNT' } ||= ".";
- $record->{ 'BLOCK_BEGS' } ||= ".";
- $record->{ 'BLOCK_LENS' } ||= ".";
- $record->{ 'BLOCK_TYPE' } ||= ".";
- $record->{ 'ALIGN' } ||= $record->{ 'DESCRIPTOR' } || ".";
+ my ( $entry );
+
+ if ( not defined $record->{ 'S_ID' } and
+ not defined $record->{ 'S_BEG' } and
+ not defined $record->{ 'S_END' } )
+ {
+ return undef;
+ }
- return wantarray ? %{ $record } : $record;
+ $entry->[ S_ID ] = $record->{ 'S_ID' };
+ $entry->[ S_BEG ] = $record->{ 'S_BEG' };
+ $entry->[ S_END ] = $record->{ 'S_END' };
+ $entry->[ Q_ID ] = $record->{ 'Q_ID' } || ".";
+ $entry->[ SCORE ] = $record->{ 'SCORE' } || $record->{ 'BIT_SCORE' } || $record->{ 'ID' } || ".";
+ $entry->[ STRAND ] = $record->{ 'STRAND' } || ".";
+ $entry->[ HITS ] = $record->{ 'HITS' } || ".";
+ $entry->[ ALIGN ] = $record->{ 'ALIGN' } || $record->{ 'DESCRIPTOR' } || ".";
+ $entry->[ BLOCK_COUNT ] = $record->{ 'BLOCK_COUNT' } || ".";
+ $entry->[ BLOCK_BEGS ] = $record->{ 'BLOCK_BEGS' } || $record->{ 'Q_BEGS' } || ".";
+ $entry->[ BLOCK_LENS ] = $record->{ 'BLOCK_LENS' } || ".";
+ $entry->[ BLOCK_TYPE ] = $record->{ 'BLOCK_TYPE' } || ".";
+
+ return wantarray ? @{ $entry } : $entry;
}
1;
+__END__
+
+sub kiss_index_nc
+{
+ # Martin A. Hansen, February 2010.
+
+ # Creates a NC list index of a sorted KISS file.
+
+ my ( $file, # path to KISS file
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $line, @fields, $nc_list );
+
+ $fh = Maasha::Filesys::file_read_open( $file );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ @fields = split "\t", $line;
+
+ if ( not defined $nc_list ) {
+ $nc_list = [ [ @fields ] ];
+ } else {
+ Maasha::NClist::nc_list_add( $nc_list, [ @fields ], INDEX_END, INDEX );
+ }
+ }
+
+ close $fh;
+
+ Maasha::NClist::nc_list_store( $nc_list, "$file.json" );
+}
+
+
+sub kiss_index_get_entries_nc
+{
+ # Martin A. Hansen, November 2009.
+
+ # Given a path to a KISS file and a KISS index
+ # along with a beg/end interval, locate all entries
+ # in that interval and return those.
+
+ my ( $index, # KISS index
+ $beg, # interval begin
+ $end, # interval end
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $features );
+
+ $features = Maasha::NClist::nc_list_get_interval( $index, $beg, $end, INDEX_BEG, INDEX_END, INDEX );
+
+ return wantarray ? @{ $features } : $features;
+}
+
+
+sub kiss_index_retrieve_nc
+{
+ # Martin A. Hansen, November 2009.
+
+ # Retrieves a KISS index from a file.
+
+ my ( $path, # Path to KISS index
+ ) = @_;
+
+ # Returns a data structure.
+
+ my ( $index );
+
+ $index = Maasha::NClist::nc_list_retrieve( $path );
+
+ return wantarray ? @{ $index } : $index;
+}