From fdcdfb877bb2d652c7c82f7a411f73ff5c9282fe Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 3 Feb 2010 12:36:54 +0000 Subject: [PATCH] added NClist.pm git-svn-id: http://biopieces.googlecode.com/svn/trunk@856 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/NClist.pm | 325 +++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 code_perl/Maasha/NClist.pm diff --git a/code_perl/Maasha/NClist.pm b/code_perl/Maasha/NClist.pm new file mode 100644 index 0000000..eda1d4a --- /dev/null +++ b/code_perl/Maasha/NClist.pm @@ -0,0 +1,325 @@ +package Maasha::NClist; + +# Copyright (C) 2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +# Nested Containment List (NCList): A new algorithm for accelerating +# interval query of genome alignment and interval databases +# http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btl647v1 + +# Nested lists are composed of intervals defined by begin and end positions, +# and any interval that is contained within another interval is rooted to this. +# Thus, fast interval lookups can be performed using binary search. + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; + +use Maasha::Filesys; +use Storable; +use Data::Dumper; +use Json::XS; + +use vars qw( @ISA @EXPORT ); + +@ISA = qw( Exporter ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub nc_list_create +{ + # Martin A. Hansen, February 2010. + + # Creates a Nested Containment (NC) list from a stack of features. + # The features consits of an AoA where beg and end specifies the + # elements containing the begin and end position of a feature, and + # index specifies a element used for nesting lists. + + my ( $features, # list of features AoA + $beg, # feature element with begin position + $end, # feature element with end position + $index, # feature element with index position + ) = @_; + + # Returns a list. + + my ( $nc_list ); + + @{ $features } = sort { $a->[ $beg ] <=> $b->[ $beg ] or $b->[ $end ] <=> $a->[ $end ] } @{ $features }; + + $nc_list = [ shift @{ $features } ]; + + map { nc_list_add( $nc_list, $_, $end, $index ) } @{ $features }; + + return wantarray ? @{ $nc_list } : $nc_list; +} + + +sub nc_list_add +{ + # Martin A. Hansen, February 2010. + + # Recursively construct a Nested Containment (NC) list by added + # a given feature to an existing NC list. + + my ( $nc_list, # NC list + $feat, # feature (AoA) + $end, # feature element with end position + $index # feature element with index position + ) = @_; + + # Returns nothing. + + if ( $feat->[ $end ] <= $nc_list->[ -1 ]->[ $end ] ) # feature is nested. + { + if ( defined $nc_list->[ -1 ]->[ $index ] ) { # sublist exists so recurse to this. + nc_list_add( $nc_list->[ -1 ]->[ $index ], $feat, $end, $index ); + } else { + $nc_list->[ -1 ]->[ $index ] = [ $feat ]; # creating a new sublist. + } + } + else + { + push @{ $nc_list }, $feat; + } +} + + +sub nc_list_lookup +{ + # Martin A. Hansen, February 2010. + + # Given a Nested Containment (NC) list use binary search to locate + # the NC list containing a given search position. The index of the NC + # list containing the search position is returned. + + my ( $nc_list, # NC list + $pos, # search position + $beg, # feature element with begin position + $end, # feature element with end position + $index, # feature element with index position + ) = @_; + + # Returns an integer. + + my ( $low, $high, $try ); + + $low = 0; + $high = scalar @{ $nc_list }; + + while ( $low < $high ) + { + $try = int( ( $high + $low ) / 2 ); + + if ( $pos < $nc_list->[ $try ]->[ $beg ] ) { + $high = $try; + } elsif ( $nc_list->[ $try ]->[ $end ] < $pos ) { + $low = $try + 1; + } else { + last; + } + } + + return $try; +} + + +sub nc_list_count +{ + # Martin A. Hansen, February 2010. + + # Traverses a Nested Containment (NC) list recursively from a + # given index begin to a given index end and counts all + # features. The count is returned. + + my ( $nc_list, # NC list + $index_beg, # index begin + $index_end, # index end + $index, # feature element with index position + ) = @_; + + # Returns an integer. + + my ( $i, $count ); + + for ( $i = $index_beg; $i <= $index_end; $i++ ) + { + $count++; + + if ( defined $nc_list->[ $i ]->[ $index ] ) { # sublist exists so recurse to this. + $count += nc_list_count( $nc_list->[ $i ]->[ $index ], 0, scalar @{ $nc_list->[ $i ]->[ $index ] } - 1, $index ); + } + } + + return $count; +} + + +sub nc_list_count_interval +{ + # Martin A. Hansen, February 2010. + + # Counts all features in a Nested Containment (NC) list within a + # specified interval. The count is returned. + + my ( $nc_list, # NC list + $int_beg, # interval begin + $int_end, # interval end + $beg, # feature element with begin position + $end, # feature element with end position + $index, # feature element with index position + ) = @_; + + # Returns an integer. + + my ( $index_beg, $index_end, $count ); + + $index_beg = nc_list_lookup( $nc_list, $int_beg, $beg, $end, $index ); + $index_end = nc_list_lookup( $nc_list, $int_end, $beg, $end, $index ); + + $count = nc_list_count( $nc_list, $index_beg, $index_end, $index ); + + return $count; +} + + +sub nc_list_get +{ + # Martin A. Hansen, February 2010. + + # Recursively retrieve all features from a Nested Containment (NC) list + # from a specified index begin to a specified index end. The index is + # stripped. + + my ( $nc_list, # NC list + $index_beg, # index begin + $index_end, # index end + $index, # feature element with index position + ) = @_; + + # Returns a list. + + my ( $i, $nc, @features ); + + for ( $i = $index_beg; $i <= $index_end; $i++ ) + { + $nc = Storable::dclone( $nc_list->[ $i ] ); + + push @features, $nc; + + if ( defined $nc_list->[ $i ]->[ $index ] ) # sublist exists so recurse to this. + { + push @features, nc_list_get( $nc_list->[ $i ]->[ $index ], 0, scalar @{ $nc_list->[ $i ]->[ $index ] } - 1, $index ); + + delete $nc->[ $index ]; + } + } + + return wantarray ? @features : \@features; +} + + +sub nc_list_get_interval +{ + # Martin A. Hansen, February 2010. + + # Retrieve all features from a Nested Containment (NC) list from within + # a specified interval. + + my ( $nc_list, # NC list + $int_beg, # interval begin + $int_end, # interval end + $beg, # feature element with begin position + $end, # feature element with end position + $index, # feature element with index position + ) = @_; + + # Returns a list. + + my ( $index_beg, $index_end, $features ); + + $index_beg = nc_list_lookup( $nc_list, $int_beg, $beg, $end, $index ); + $index_end = nc_list_lookup( $nc_list, $int_end, $beg, $end, $index ); + + $features = nc_list_get( $nc_list, $index_beg, $index_end, $index ); + + return wantarray ? @{ $features } : $features; +} + + +sub nc_list_store +{ + my ( $nc_list, # NC list + $file, # path to file + ) = @_; + + # Returns nothing. + + my ( $fh, $json ); + + $json = JSON::XS::encode_json( $nc_list ); + + $fh = Maasha::Filesys::file_write_open( $file ); + + print $fh $json; + + close $fh; +} + + +sub nc_list_retieve +{ + # Martin A. Hansen, February 2010. + + # Retrieves a Nested Containment (NC) list from a + # JSON file. + + my ( $file, # path to JSON file + ) = @_; + + # Returns NC list. + + my ( $fh, $json, $nc_list ); + + local $/ = undef; + + $fh = Maasha::Filesys::file_read_open( $file ); + + $json = <$fh>; + + close $fh; + + $nc_list = JSON::XS::decode_json( $json ); + + return wantarray ? @{ $nc_list } : $nc_list; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +1; -- 2.39.2