]> git.donarmstrong.com Git - biopieces.git/commitdiff
added remove_indels method
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 11 Jan 2012 17:02:00 +0000 (17:02 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 11 Jan 2012 17:02:00 +0000 (17:02 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1723 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/seq.rb
code_ruby/test/maasha/test_seq.rb

index 562bab91848cab683cef488e35122cf813a03ad0..f4abff7c36aafba918a04b76d5e6f3544345225c 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
 
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -134,6 +134,28 @@ class Seq
     self.seq.scan(regex).size
   end
 
+  # Method to remove indels from seq and qual if qual.
+  def indels_remove
+    if self.qual.nil?
+      self.seq.delete!(Regexp.escape(INDELS.join('')))
+    else
+      na_seq  = NArray.to_na(self.seq, "byte")
+      na_qual = NArray.to_na(self.qual, "byte")
+      mask    = NArray.byte(self.length)
+
+      INDELS.each do |c|
+        mask += na_seq.eq(c.ord)
+      end
+
+      mask = mask.eq(0)
+
+      self.seq  = na_seq[mask].to_s
+      self.qual = na_qual[mask].to_s
+    end
+
+    self
+  end
+
   # Method that returns true is a given sequence type is DNA.
   def is_dna?
     self.type == 'dna'
index 76652981e4349b1ea57e875cdd5c879e37e310d6..a4d47ad203d601a54a5012855c1ca3942976e186 100755 (executable)
@@ -458,6 +458,19 @@ class TestSeq < Test::Unit::TestCase
     assert_equal("ghhhhhg", @entry.qual) 
   end
 
+  def test_Seq_indels_remove_without_qual_returns_correctly
+    @entry.seq  = "A-T.CG~CG"
+    @entry.qual = nil
+    assert_equal("ATCGCG", @entry.indels_remove.seq)
+  end
+
+  def test_Seq_indels_remove_with_qual_returns_correctly
+    @entry.seq  = "A-T.CG~CG"
+    @entry.qual = "a@b@cd@fg"
+    assert_equal("ATCGCG", @entry.indels_remove.seq)
+    assert_equal("abcdfg", @entry.indels_remove.qual)
+  end
+
   def test_Seq_composition_returns_correctly
     @entry.seq = "AAAATTTCCG"
     assert_equal(4, @entry.composition["A"])