]> git.donarmstrong.com Git - biopieces.git/commitdiff
added min option to find_homopolymers
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 16 Dec 2010 13:07:14 +0000 (13:07 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 16 Dec 2010 13:07:14 +0000 (13:07 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1192 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/find_homopolymers
bp_test/out/find_homopolymers.out.2 [new file with mode: 0644]
bp_test/test/test_find_homopolymers
code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

index 097d651b6dcd2866c4ed749bb10898ddd4dfa267..ba9d63975906d61f2f37e0ed6edd08992f43ef14 100755 (executable)
@@ -34,6 +34,7 @@ require 'seq'
 require 'pp'
 
 casts = []
+casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"}
 
 bp = Biopieces.new
 
@@ -43,7 +44,7 @@ bp.each_record do |record|
   if record.has_key? :SEQ
     seq = Seq.new(nil, record[:SEQ])
 
-    record[:HOMOPOL_MAX] = seq.homopol_max
+    record[:HOMOPOL_MAX] = seq.homopol_max(options[:min])
   end
 
   bp.puts record
diff --git a/bp_test/out/find_homopolymers.out.2 b/bp_test/out/find_homopolymers.out.2
new file mode 100644 (file)
index 0000000..9213941
--- /dev/null
@@ -0,0 +1,20 @@
+SEQ_NAME: test1
+SEQ: attcccggggnnnnn
+SEQ_LEN: 15
+HOMOPOL_MAX: 5
+---
+SEQ_NAME: test2
+SEQ: nnnnnggggccctta
+SEQ_LEN: 15
+HOMOPOL_MAX: 5
+---
+SEQ_NAME: test3
+SEQ: a
+SEQ_LEN: 1
+HOMOPOL_MAX: 0
+---
+SEQ_NAME: test4
+SEQ: aA
+SEQ_LEN: 2
+HOMOPOL_MAX: 0
+---
index 61c570925c960531a3e4b24eb994deff0d49ef25..939c6f4b961d02b94e82a9371ce4ceed6fb0f85a 100755 (executable)
@@ -5,3 +5,7 @@ source "$BP_DIR/bp_test/lib/test.sh"
 run "$bp -I $in -O $tmp"
 assert_no_diff $tmp $out.1
 clean
+
+run "$bp -I $in -m 3 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
index 696e07080801280b2f970d42bddd9d1faaa8d1d9..04dfdeb4ce6b02e8bd1522ed5bf60fa0e27bf5d5 100644 (file)
@@ -164,14 +164,19 @@ class Seq
 
   # Method that returns the length of the longest homopolymeric stretch
   # found in a sequence.
-  def homopol_max(max = 1)
+  def homopol_max(min = 1)
     return 0 if self.seq.nil? or self.seq.empty?
 
-    self.seq.upcase.scan(/A{#{max},}|T{#{max},}|G{#{max},}|C{#{max},}|N{#{max},}/) do |match|
-      max = match.size > max ? match.size : max
+    found = false
+
+    self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match|
+      found = true
+      min   = match.size > min ? match.size : min
     end
 
-    max
+    return 0 unless found
+    min
   end
 
   # Method that returns the percentage of hard masked residues
index a5a27321bf08debb4dcdeaf672dd32cac0679c3e..6e102881a377391cf13c1e9109bf42ea4915bcb1 100755 (executable)
@@ -201,9 +201,14 @@ class TestSeq < Test::Unit::TestCase
     assert_equal(0, @entry.homopol_max)
   end
 
+  def test_Seq_homopol_max_returns_0_when_not_found
+    @entry.seq = "AtTcCcGggGnnNnn"
+    assert_equal(0, @entry.homopol_max(6))
+  end
+
   def test_Seq_homopol_max_returns_correctly
     @entry.seq = "AtTcCcGggGnnNnn"
-    assert_equal(5, @entry.homopol_max)
+    assert_equal(5, @entry.homopol_max(3))
   end
 
   def test_Seq_hard_mask_returns_correctly