@ISA = qw( Exporter );
+use constant {
+ S_ID => 0,
+ S_BEG => 1,
+ S_END => 2,
+ Q_ID => 3,
+ SCORE => 4,
+ STRAND => 5,
+ HITS => 6,
+ ALIGN => 7,
+ BLOCK_COUNT => 8,
+ BLOCK_BEGS => 9,
+ BLOCK_LENS => 10,
+ BLOCK_TYPE => 11,
+};
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$start = $cookie->{ 'NAV_START' };
$end = $cookie->{ 'NAV_END' };
- $index = Maasha::KISS::kiss_index_retrieve( "$track/track_data.kiss.index" );
+ $index = Maasha::KISS::kiss_index_retrieve( "$track/track_data.kiss.json" );
$count = Maasha::KISS::kiss_index_count( $index, $start, $end );
$track_name = ( split "/", $track )[ -1 ];
if ( $count > $cookie->{ 'FEAT_MAX' } )
{
- $entries = Maasha::KISS::kiss_index_get_blocks( $index, $start, $end );
- push @{ $features }, track_feature_histogram( $cookie, $start, $end, $entries );
+# $entries = Maasha::KISS::kiss_index_get_blocks( $index, $start, $end );
+# push @{ $features }, track_feature_histogram( $cookie, $start, $end, $entries );
}
else
{
- $entries = Maasha::KISS::kiss_index_get_entries( "$track/track_data.kiss", $index, $start, $end );
+ $entries = Maasha::KISS::kiss_index_get_entries( $index, $start, $end );
push @{ $features }, track_feature_linear( $cookie, $start, $end, $entries );
}
my ( $factor, $entry, $y_step, @ladder, $y_max, $w, $x1, $y1, $x2, $y2, @features );
- @{ $entries } = sort { $a->{ 'S_BEG' } <=> $b->{ 'S_BEG' } or $b->{ 'S_END' } <=> $a->{ 'S_END' } } @{ $entries };
+ # @{ $entries } = sort { $a->[ S_BEG ] <=> $b->[ S_BEG ] or $b->[ S_END ] <=> $a->[ S_END ] } @{ $entries };
$factor = $cookie->{ 'IMG_WIDTH' } / ( $end - $beg + 1 );
$y_step = 0;
foreach $entry ( @{ $entries } )
{
- $w = sprintf( "%.0f", ( $entry->{ 'S_END' } - $entry->{ 'S_BEG' } + 1 ) * $factor );
+ $w = sprintf( "%.0f", ( $entry->[ S_END ] - $entry->[ S_BEG ] + 1 ) * $factor );
if ( $w >= 1 )
{
- $x1 = sprintf( "%.0f", ( $entry->{ 'S_BEG' } - $beg ) * $factor );
+ $x1 = sprintf( "%.0f", ( $entry->[ S_BEG ] - $beg ) * $factor );
for ( $y_step = 0; $y_step < @ladder; $y_step++ ) {
last if $x1 >= $ladder[ $y_step ] + 1;
type => 'rect',
line_width => $cookie->{ 'FEAT_WIDTH' },
color => $cookie->{ 'FEAT_COLOR' },
- title => "Q_ID: $entry->{ 'Q_ID' } S_BEG: $entry->{ 'S_BEG' } S_END: $entry->{ 'S_END' } STRAND: $entry->{ 'STRAND' }",
- q_id => $entry->{ 'Q_ID' },
- s_beg => $entry->{ 'S_BEG' },
- s_end => $entry->{ 'S_END' },
- strand => $entry->{ 'STRAND' },
+ title => "Q_ID: $entry->[ Q_ID ] S_BEG: $entry->[ S_BEG ] S_END: $entry->[ S_END ] STRAND: $entry->[ STRAND ]",
+ q_id => $entry->[ Q_ID ],
+ s_beg => $entry->[ S_BEG ],
+ s_end => $entry->[ S_END ],
+ strand => $entry->[ STRAND ],
x1 => $x1,
y1 => $y1,
x2 => $x1 + $w,
$y_max = Maasha::Calc::max( $y_max, $y_step * ( 0.1 + $cookie->{ 'FEAT_WIDTH' } ) );
- push @features, feature_align( $entry, $beg, $y1, $factor, $cookie->{ 'FEAT_WIDTH' } ) if $entry->{ 'ALIGN' } ne '.';
+ push @features, feature_align( $entry, $beg, $y1, $factor, $cookie->{ 'FEAT_WIDTH' } ) if $entry->[ ALIGN ] ne '.';
$ladder[ $y_step ] = $x1 + $w;
}
Maasha::Common::error( qq(BAD align descriptor: "$align") );
}
- $x1 = sprintf( "%.0f", ( $entry->{ 'S_BEG' } + $pos - $beg ) * $factor );
+ $x1 = sprintf( "%.0f", ( $entry->[ S_BEG ] + $pos - $beg ) * $factor );
push @features, {
type => 'rect',
# Returns a list.
- my ( $search_track, $search_term, $contig, @tracks, $track, $file, $line, $out_file, $fh, $entry, @entries, $track_name );
+ my ( $search_track, $search_term, $contig, @tracks, $track, $file, $features, $track_name, $nc_list );
if ( $cookie->{ 'SEARCH' } =~ /^(.+)\s+track:\s*(.+)/i )
{
next if $track_name !~ /$search_track/;
}
- $file = "$track/track_data.kiss";
+ $file = "$track/track_data.kiss.json";
if ( -f $file )
{
- $fh = Maasha::Filesys::file_read_open( $file );
-
- while ( $line = <$fh> )
- {
- chomp $line;
-
- if ( $line =~ /$search_term/i )
- {
- $entry = Maasha::KISS::kiss_entry_parse( $line );
-
- push @entries, $entry;
- }
- }
-
- close $fh;
+ $nc_list = Maasha::NClist::nc_list_retrieve( $file );
+ $features = Maasha::NClist::nc_list_search( $nc_list, $search_term, 12 );
}
}
- return wantarray ? @entries : \@entries;
+ return wantarray ? @{ $features } : $features;
}
use Data::Dumper;
use Maasha::Common;
use Maasha::Filesys;
+use Maasha::NClist;
use Maasha::Align;
+
use vars qw( @ISA @EXPORT );
@ISA = qw( Exporter );
BUCKET_SIZE => 100,
COUNT => 0,
OFFSET => 1,
+
+ INDEX_BEG => 1,
+ INDEX_END => 2,
+ INDEX => 12,
};
# Returns a hashref.
- my ( $line, @fields, %entry );
+ my ( $line, @fields );
while ( $line = <$fh> )
{
Maasha::Common::error( qq(BAD kiss entry: $line) ) if not @fields == 12;
- $entry{ 'S_ID' } = $fields[ S_ID ];
- $entry{ 'S_BEG' } = $fields[ S_BEG ];
- $entry{ 'S_END' } = $fields[ S_END ];
- $entry{ 'Q_ID' } = $fields[ Q_ID ];
- $entry{ 'SCORE' } = $fields[ SCORE ];
- $entry{ 'STRAND' } = $fields[ STRAND ];
- $entry{ 'HITS' } = $fields[ HITS ];
- $entry{ 'ALIGN' } = $fields[ ALIGN ];
- $entry{ 'BLOCK_COUNT' } = $fields[ BLOCK_COUNT ];
- $entry{ 'BLOCK_BEGS' } = $fields[ BLOCK_BEGS ];
- $entry{ 'BLOCK_LENS' } = $fields[ BLOCK_LENS ];
- $entry{ 'BLOCK_TYPE' } = $fields[ BLOCK_TYPE ];
-
- return wantarray ? %entry : \%entry;
+ return wantarray ? @fields : \@fields;
}
}
{
# Martin A. Hansen, December 2009.
+ # TODO find out what uses this and kill it!
+
# Parses a line with a KISS entry.
my ( $line, # KISS line to parse
# Returns nothing.
- my ( @fields );
+ Maasha::Common::error( qq(BAD kiss entry) ) if not scalar @{ $entry } == 12;
- if ( defined $entry->{ 'S_ID' } and
- defined $entry->{ 'S_BEG' } and
- defined $entry->{ 'S_END' }
+ if ( defined $entry->[ S_ID ] and
+ defined $entry->[ S_BEG ] and
+ defined $entry->[ S_END ]
)
{
- Maasha::Common::error( qq(Bad S_BEG value: $entry->{ 'S_BEG' } < 0 ) ) if $entry->{ 'S_BEG' } < 0;
- Maasha::Common::error( qq(Bad S_END value: $entry->{ 'S_END' } < $entry->{ 'S_BEG' }) ) if $entry->{ 'S_END' } < $entry->{ 'S_BEG' };
+ Maasha::Common::error( qq(Bad S_BEG value: $entry->[ S_BEG ] < 0 ) ) if $entry->[ S_BEG ] < 0;
+ Maasha::Common::error( qq(Bad S_END value: $entry->[ S_END ] < $entry->[ S_BEG ] ) ) if $entry->[ S_END ] < $entry->[ S_BEG ];
$fh ||= \*STDOUT;
- $fields[ S_ID ] = $entry->{ 'S_ID' };
- $fields[ S_BEG ] = $entry->{ 'S_BEG' };
- $fields[ S_END ] = $entry->{ 'S_END' };
- $fields[ Q_ID ] = $entry->{ 'Q_ID' } || ".";
- $fields[ SCORE ] = $entry->{ 'SCORE' } || ".";
- $fields[ STRAND ] = $entry->{ 'STRAND' } || ".";
- $fields[ HITS ] = $entry->{ 'HITS' } || ".";
- $fields[ ALIGN ] = $entry->{ 'ALIGN' } || ".";
- $fields[ BLOCK_COUNT ] = $entry->{ 'BLOCK_COUNT' } || ".";
- $fields[ BLOCK_BEGS ] = $entry->{ 'BLOCK_BEGS' } || ".";
- $fields[ BLOCK_LENS ] = $entry->{ 'BLOCK_LENS' } || ".";
- $fields[ BLOCK_TYPE ] = $entry->{ 'BLOCK_TYPE' } || ".";
-
- print $fh join( "\t", @fields ), "\n";
+ $entry->[ Q_ID ] = "." if not defined $entry->[ Q_ID ];
+ $entry->[ SCORE ] = "." if not defined $entry->[ SCORE ];
+ $entry->[ STRAND ] = "." if not defined $entry->[ STRAND ];
+ $entry->[ HITS ] = "." if not defined $entry->[ HITS ];
+ $entry->[ ALIGN ] = "." if not defined $entry->[ ALIGN ];
+ $entry->[ BLOCK_COUNT ] = "." if not defined $entry->[ BLOCK_COUNT ];
+ $entry->[ BLOCK_BEGS ] = "." if not defined $entry->[ BLOCK_BEGS ];
+ $entry->[ BLOCK_LENS ] = "." if not defined $entry->[ BLOCK_LENS ];
+ $entry->[ BLOCK_TYPE ] = "." if not defined $entry->[ BLOCK_TYPE ];
+
+ print $fh join( "\t", @{ $entry } ), "\n";
}
}
}
-sub kiss_index
+sub kiss_index_old
{
# Martin A. Hansen, December 2009.
}
+sub kiss_index
+{
+ # Martin A. Hansen, February 2010.
+
+ # Creates a NC list index of a sorted KISS file.
+
+ my ( $file, # path to KISS file
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $line, @fields, $nc_list );
+
+ $fh = Maasha::Filesys::file_read_open( $file );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ @fields = split "\t", $line;
+
+ if ( not defined $nc_list ) {
+ $nc_list = [ [ @fields ] ];
+ } else {
+ Maasha::NClist::nc_list_add( $nc_list, [ @fields ], INDEX_END, INDEX );
+ }
+ }
+
+ close $fh;
+
+ Maasha::NClist::nc_list_store( $nc_list, "$file.json" );
+}
+
+
sub kiss_index_offset
{
# Martin A. Hansen, December 2009.
# Returns a number.
- my ( $bucket_beg, $bucket_end, $count, $i );
+ my ( $count );
Maasha::Common::error( qq(Negative begin position: "$beg") ) if $beg < 0;
- $bucket_beg = int( $beg / BUCKET_SIZE );
- $bucket_end = int( $end / BUCKET_SIZE );
-
- $bucket_end = scalar @{ $index } if $bucket_end > scalar @{ $index };
-
- $count = 0;
-
- for ( $i = $bucket_beg; $i <= $bucket_end; $i++ ) {
- $count += $index->[ $i ]->[ COUNT ] if defined $index->[ $i ];
- }
+ $count = Maasha::NClist::nc_list_count_interval( $index, $beg, $end, INDEX_BEG, INDEX_END, INDEX );
return $count;
}
# along with a beg/end interval, locate all entries
# in that interval and return those.
- my ( $file, # path to KISS file
- $index, # KISS index
+ my ( $index, # KISS index
$beg, # interval begin
$end, # interval end
) = @_;
# Returns a list.
- my ( $offset, $fh, $entry, @entries );
-
- $offset = kiss_index_offset( $index, $beg );
-
- $fh = Maasha::Filesys::file_read_open( $file );
-
- sysseek( $fh, $offset, 0 );
-
- while ( $entry = Maasha::KISS::kiss_entry_get( $fh ) )
- {
- push @entries, $entry if $entry->{ 'S_END' } > $beg;
+ my ( $features );
- last if $entry->{ 'S_BEG' } > $end;
- }
-
- close $fh;
+ $features = Maasha::NClist::nc_list_get_interval( $index, $beg, $end, INDEX_BEG, INDEX_END, INDEX );
- return wantarray ? @entries : \@entries;
+ return wantarray ? @{ $features } : $features;
}
my ( $entry, %lookup, $pos, $overlap, @entries );
while ( $entry = kiss_entry_get( $fh2 ) ) {
- map { $lookup{ $_ } = 1 } ( $entry->{ 'S_BEG' } .. $entry->{ 'S_END' } );
+ map { $lookup{ $_ } = 1 } ( $entry->[ S_BEG ] .. $entry->[ S_END ] );
}
while ( $entry = kiss_entry_get( $fh1 ) )
{
$overlap = 0;
- foreach $pos ( $entry->{ 'S_BEG' } .. $entry->{ 'S_END' } )
+ foreach $pos ( $entry->[ S_BEG ] .. $entry->[ S_END ] )
{
if ( exists $lookup{ $pos } )
{
my ( $index );
- $index = Maasha::Filesys::file_retrieve( $path );
+ $index = Maasha::NClist::nc_list_retrieve( $path );
return wantarray ? @{ $index } : $index;
}
# Martin A. Hansen, November 2009.
# Converts a KISS entry to a Biopiece record.
- # TODO: Consistency checking
my ( $entry, # KISS entry
) = @_;
# Returns a hashref
- return wantarray ? %{ $entry } : $entry;
+ my ( %record );
+
+ Maasha::Common::error( qq(BAD kiss entry) ) if not scalar @{ $entry } == 12;
+
+ $record{ 'S_ID' } = $entry->[ S_ID ];
+ $record{ 'S_BEG' } = $entry->[ S_BEG ];
+ $record{ 'S_END' } = $entry->[ S_END ];
+ $record{ 'Q_ID' } = $entry->[ Q_ID ];
+ $record{ 'SCORE' } = $entry->[ SCORE ];
+ $record{ 'STRAND' } = $entry->[ STRAND ];
+ $record{ 'HITS' } = $entry->[ HITS ];
+ $record{ 'ALIGN' } = $entry->[ ALIGN ];
+ $record{ 'BLOCK_COUNT' } = $entry->[ BLOCK_COUNT ];
+ $record{ 'BLOCK_BEGS' } = $entry->[ BLOCK_BEGS ];
+ $record{ 'BLOCK_LENS' } = $entry->[ BLOCK_LENS ];
+ $record{ 'BLOCK_TYPE' } = $entry->[ BLOCK_TYPE ];
+
+ return wantarray ? %record : \%record;
}
# Returns a hashref
+ my ( $entry );
+
if ( not defined $record->{ 'S_ID' } and
not defined $record->{ 'S_BEG' } and
not defined $record->{ 'S_END' } )
return undef;
}
- $record->{ 'SCORE' } ||= $record->{ 'E_VAL' } || ".";
- $record->{ 'HITS' } ||= ".";
- $record->{ 'BLOCK_COUNT' } ||= ".";
- $record->{ 'BLOCK_BEGS' } ||= ".";
- $record->{ 'BLOCK_LENS' } ||= ".";
- $record->{ 'BLOCK_TYPE' } ||= ".";
- $record->{ 'ALIGN' } ||= $record->{ 'DESCRIPTOR' } || ".";
-
- return wantarray ? %{ $record } : $record;
+ $entry->[ S_ID ] = $record->{ 'S_ID' };
+ $entry->[ S_BEG ] = $record->{ 'S_BEG' };
+ $entry->[ S_END ] = $record->{ 'S_END' };
+ $entry->[ Q_ID ] = $record->{ 'Q_ID' } || ".";
+ $entry->[ SCORE ] = $record->{ 'SCORE' } || $record->{ 'E_VAL' } || ".";
+ $entry->[ STRAND ] = $record->{ 'STRAND' } || ".";
+ $entry->[ HITS ] = $record->{ 'HITS' } || ".";
+ $entry->[ ALIGN ] = $record->{ 'ALIGN' } || $record->{ 'DESCRIPTOR' } || ".";
+ $entry->[ BLOCK_COUNT ] = $record->{ 'BLOCK_COUNT' } || ".";
+ $entry->[ BLOCK_BEGS ] = $record->{ 'BLOCK_BEGS' } || ".";
+ $entry->[ BLOCK_LENS ] = $record->{ 'BLOCK_LENS' } || ".";
+ $entry->[ BLOCK_TYPE ] = $record->{ 'BLOCK_TYPE' } || ".";
+
+ return wantarray ? @{ $entry } : $entry;
}
use strict;
use Maasha::Filesys;
-use Storable;
use Data::Dumper;
+use Time::HiRes;
use Json::XS;
use vars qw( @ISA @EXPORT );
{
# Martin A. Hansen, February 2010.
- # Recursively construct a Nested Containment (NC) list by added
+ # Recursively construct a Nested Containment (NC) list by adding
# a given feature to an existing NC list.
my ( $nc_list, # NC list
# Given a Nested Containment (NC) list use binary search to locate
# the NC list containing a given search position. The index of the NC
- # list containing the search position is returned.
+ # list containing the search position is returned. Extremely fast.
my ( $nc_list, # NC list
$pos, # search position
# Martin A. Hansen, February 2010.
# Recursively retrieve all features from a Nested Containment (NC) list
- # from a specified index begin to a specified index end. The index is
- # stripped.
+ # from a specified index begin to a specified index end.
+
+ # WARNING: The NC list is distroyed because the sublists are stripped.
my ( $nc_list, # NC list
$index_beg, # index begin
# Returns a list.
- my ( $i, $nc, @features );
+ my ( $i, @features );
for ( $i = $index_beg; $i <= $index_end; $i++ )
{
- $nc = Storable::dclone( $nc_list->[ $i ] );
-
- push @features, $nc;
+ push @features, $nc_list->[ $i ];
if ( defined $nc_list->[ $i ]->[ $index ] ) # sublist exists so recurse to this.
{
push @features, nc_list_get( $nc_list->[ $i ]->[ $index ], 0, scalar @{ $nc_list->[ $i ]->[ $index ] } - 1, $index );
- delete $nc->[ $index ];
+ delete $nc_list->[ $i ]->[ $index ];
}
}
}
+sub nc_list_search
+{
+ # Martin A. Hansen, February 2010.
+
+ # Recursively search a Nested Containment (NC) list for features matching
+ # a given REGEX.
+
+ my ( $nc_list, # NC list
+ $regex, # regex to search for
+ $index, # feature element with index position
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $feature, @features );
+
+ foreach $feature ( @{ $nc_list } )
+ {
+ push @features, $feature if grep /$regex/i, @{ $feature };
+
+ if ( defined $feature->[ $index ] ) # sublist exists so recurse to this.
+ {
+ push @features, nc_list_search( $feature->[ $index ], $regex, $index );
+
+ delete $feature->[ $index ];
+ }
+ }
+
+ return wantarray ? @features : \@features;
+}
+
+
sub nc_list_store
{
+ # Martin A. Hansen, February 2010.
+
+ # Store a Nested Containment (NC) list to a
+ # given file.
+
my ( $nc_list, # NC list
$file, # path to file
) = @_;
}
-sub nc_list_retieve
+sub nc_list_retrieve
{
# Martin A. Hansen, February 2010.
1;
+
+__END__
+
+ my $t0 = Time::HiRes::gettimeofday();
+ my $t1 = Time::HiRes::gettimeofday(); print STDERR "Time: " . ( $t1 - $t0 ) . "\n";