#!/usr/bin/env ruby
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2013 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
require 'maasha/biopieces'
require 'maasha/seq'
+require 'pp'
+
+class Seq; include Homopolymer; end
casts = []
-casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"}
+casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"}
+casts << {:long=>'limit', :short=>'l', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"}
+casts << {:long=>'longest', :short=>'L', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
if record[:SEQ]
seq = Seq.new(nil, record[:SEQ])
- record[:HOMOPOL_MAX] = seq.homopol_max(options[:min])
+ longest = Seq::Homopolymer.new("", 0, 0)
+ got_one = false
+ count = 0
+
+ seq.each_homopolymer(options[:min]) do |h|
+ got_one = true
+
+ record[:HOMOPOL_PAT] = h.pattern
+ record[:HOMOPOL_LEN] = h.length
+ record[:HOMOPOL_POS] = h.pos
+
+ if options[:longest]
+ longest = h.length > longest.length ? h : longest
+ else
+ output.puts record
+
+ count += 1
+
+ break if options[:limit] and options[:limit] == count
+ end
+ end
+
+ if options[:longest]
+ if longest.length > options[:min]
+ record[:HOMOPOL_PAT] = longest.pattern
+ record[:HOMOPOL_LEN] = longest.length
+ record[:HOMOPOL_POS] = longest.pos
+ end
+
+ output.puts record
+ elsif not got_one
+ output.puts record
+ end
+ else
+ output.puts record
end
-
- output.puts record
end
end
require 'maasha/seq/trim'
require 'narray'
-autoload :BackTrack, 'maasha/seq/backtrack.rb'
-autoload :Dynamic, 'maasha/seq/dynamic.rb'
+autoload :BackTrack, 'maasha/seq/backtrack.rb'
+autoload :Dynamic, 'maasha/seq/dynamic.rb'
+autoload :Homopolymer, 'maasha/seq/homopolymer.rb'
# Residue alphabets
DNA = %w[a t c g]
comp
end
- # Method that returns the length of the longest homopolymeric stretch
- # found in a sequence.
- def homopol_max(min = 1)
- return 0 if self.seq.nil? or self.seq.empty?
-
- found = false
-
- self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match|
- found = true
- min = match.size > min ? match.size : min
- end
-
- return 0 unless found
-
- min
- end
-
# Method that returns the percentage of hard masked residues
# or N's in a sequence.
def hard_mask
assert_equal(0, @entry.composition["X"])
end
- test "#homopol_max returns 0 with empty sequence" do
- @entry.seq = ""
- assert_equal(0, @entry.homopol_max)
- end
-
- test "#homopol_max returns 0 with nil sequence" do
- @entry.seq = nil
- assert_equal(0, @entry.homopol_max)
- end
-
- test "#homopol_max returns 0 when not found" do
- @entry.seq = "AtTcCcGggGnnNnn"
- assert_equal(0, @entry.homopol_max(6))
- end
-
- test "#homopol_max returns correctly" do
- @entry.seq = "AtTcCcGggGnnNnn"
- assert_equal(5, @entry.homopol_max(3))
- end
-
test "#hard_mask returns correctly" do
@entry.seq = "--AAAANn"
assert_equal(33.33, @entry.hard_mask)