]> git.donarmstrong.com Git - biopieces.git/commitdiff
added kiss_intersect
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 14 Dec 2009 08:28:45 +0000 (08:28 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 14 Dec 2009 08:28:45 +0000 (08:28 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@803 74ccb610-7750-0410-82ae-013aeee3265d

code_perl/Maasha/KISS.pm

index a2b3f152f6071b9992102bdb32be972c1f3ddd13..cd52a68450f9e54f8992eff930262abdabbbe72d 100644 (file)
@@ -197,6 +197,54 @@ sub kiss_sort
 }
 
 
+sub kiss_intersect
+{
+    # Martin A. Hansen, December 2009.
+
+    # Given filehandles to two different unsorted KISS files
+    # intersect the entries so that all entries from file1 that
+    # overlap entries in file2 are returned - unless the inverse flag
+    # is given in which case entreis from file1 that does not
+    # overlap any entries in file2 are returned.
+
+    my ( $fh1,       # filehandle to file1
+         $fh2,       # filehandle to file2
+         $inverse,   # flag indicating inverse matching - OPTIONAL
+       ) = @_;
+
+    # Returns a list
+
+    my ( $entry, %lookup, $pos, $overlap, @entries );
+
+    while ( $entry = kiss_entry_get( $fh2 ) ) {
+        map { $lookup{ $_ } = 1 } ( $entry->{ 'S_BEG' } .. $entry->{ 'S_END' } );
+    }
+
+    while ( $entry = kiss_entry_get( $fh1 ) )
+    {
+        $overlap = 0;
+
+        foreach $pos ( $entry->{ 'S_BEG' } .. $entry->{ 'S_END' } )
+        {
+            if ( exists $lookup{ $pos } )
+            {
+                $overlap = 1;
+
+                last;
+            }
+        }
+
+        if ( $overlap and not $inverse ) {
+            push @entries, $entry;
+        } elsif ( not $overlap and $inverse ) {
+            push @entries, $entry;
+        }
+    }
+
+    return wantarray ? @entries : \@entries;
+}
+
+
 sub kiss_index
 {
     # Martin A, Hansen, November 2009.