require 'pp'
casts = []
+casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"}
bp = Biopieces.new
if record.has_key? :SEQ
seq = Seq.new(nil, record[:SEQ])
- record[:HOMOPOL_MAX] = seq.homopol_max
+ record[:HOMOPOL_MAX] = seq.homopol_max(options[:min])
end
bp.puts record
--- /dev/null
+SEQ_NAME: test1
+SEQ: attcccggggnnnnn
+SEQ_LEN: 15
+HOMOPOL_MAX: 5
+---
+SEQ_NAME: test2
+SEQ: nnnnnggggccctta
+SEQ_LEN: 15
+HOMOPOL_MAX: 5
+---
+SEQ_NAME: test3
+SEQ: a
+SEQ_LEN: 1
+HOMOPOL_MAX: 0
+---
+SEQ_NAME: test4
+SEQ: aA
+SEQ_LEN: 2
+HOMOPOL_MAX: 0
+---
run "$bp -I $in -O $tmp"
assert_no_diff $tmp $out.1
clean
+
+run "$bp -I $in -m 3 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
# Method that returns the length of the longest homopolymeric stretch
# found in a sequence.
- def homopol_max(max = 1)
+ def homopol_max(min = 1)
return 0 if self.seq.nil? or self.seq.empty?
- self.seq.upcase.scan(/A{#{max},}|T{#{max},}|G{#{max},}|C{#{max},}|N{#{max},}/) do |match|
- max = match.size > max ? match.size : max
+ found = false
+
+ self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match|
+ found = true
+ min = match.size > min ? match.size : min
end
- max
+ return 0 unless found
+
+ min
end
# Method that returns the percentage of hard masked residues
assert_equal(0, @entry.homopol_max)
end
+ def test_Seq_homopol_max_returns_0_when_not_found
+ @entry.seq = "AtTcCcGggGnnNnn"
+ assert_equal(0, @entry.homopol_max(6))
+ end
+
def test_Seq_homopol_max_returns_correctly
@entry.seq = "AtTcCcGggGnnNnn"
- assert_equal(5, @entry.homopol_max)
+ assert_equal(5, @entry.homopol_max(3))
end
def test_Seq_hard_mask_returns_correctly