From: martinahansen Date: Thu, 16 Dec 2010 13:07:14 +0000 (+0000) Subject: added min option to find_homopolymers X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=88d5613e01a4034cc8586c753fa33d5db745b91e;p=biopieces.git added min option to find_homopolymers git-svn-id: http://biopieces.googlecode.com/svn/trunk@1192 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/find_homopolymers b/bp_bin/find_homopolymers index 097d651..ba9d639 100755 --- a/bp_bin/find_homopolymers +++ b/bp_bin/find_homopolymers @@ -34,6 +34,7 @@ require 'seq' require 'pp' casts = [] +casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} bp = Biopieces.new @@ -43,7 +44,7 @@ bp.each_record do |record| if record.has_key? :SEQ seq = Seq.new(nil, record[:SEQ]) - record[:HOMOPOL_MAX] = seq.homopol_max + record[:HOMOPOL_MAX] = seq.homopol_max(options[:min]) end bp.puts record diff --git a/bp_test/out/find_homopolymers.out.2 b/bp_test/out/find_homopolymers.out.2 new file mode 100644 index 0000000..9213941 --- /dev/null +++ b/bp_test/out/find_homopolymers.out.2 @@ -0,0 +1,20 @@ +SEQ_NAME: test1 +SEQ: attcccggggnnnnn +SEQ_LEN: 15 +HOMOPOL_MAX: 5 +--- +SEQ_NAME: test2 +SEQ: nnnnnggggccctta +SEQ_LEN: 15 +HOMOPOL_MAX: 5 +--- +SEQ_NAME: test3 +SEQ: a +SEQ_LEN: 1 +HOMOPOL_MAX: 0 +--- +SEQ_NAME: test4 +SEQ: aA +SEQ_LEN: 2 +HOMOPOL_MAX: 0 +--- diff --git a/bp_test/test/test_find_homopolymers b/bp_test/test/test_find_homopolymers index 61c5709..939c6f4 100755 --- a/bp_test/test/test_find_homopolymers +++ b/bp_test/test/test_find_homopolymers @@ -5,3 +5,7 @@ source "$BP_DIR/bp_test/lib/test.sh" run "$bp -I $in -O $tmp" assert_no_diff $tmp $out.1 clean + +run "$bp -I $in -m 3 -O $tmp" +assert_no_diff $tmp $out.2 +clean diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index 696e070..04dfdeb 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -164,14 +164,19 @@ class Seq # Method that returns the length of the longest homopolymeric stretch # found in a sequence. - def homopol_max(max = 1) + def homopol_max(min = 1) return 0 if self.seq.nil? or self.seq.empty? - self.seq.upcase.scan(/A{#{max},}|T{#{max},}|G{#{max},}|C{#{max},}|N{#{max},}/) do |match| - max = match.size > max ? match.size : max + found = false + + self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match| + found = true + min = match.size > min ? match.size : min end - max + return 0 unless found + + min end # Method that returns the percentage of hard masked residues diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index a5a2732..6e10288 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -201,9 +201,14 @@ class TestSeq < Test::Unit::TestCase assert_equal(0, @entry.homopol_max) end + def test_Seq_homopol_max_returns_0_when_not_found + @entry.seq = "AtTcCcGggGnnNnn" + assert_equal(0, @entry.homopol_max(6)) + end + def test_Seq_homopol_max_returns_correctly @entry.seq = "AtTcCcGggGnnNnn" - assert_equal(5, @entry.homopol_max) + assert_equal(5, @entry.homopol_max(3)) end def test_Seq_hard_mask_returns_correctly