--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Remove Illumina adaptors or parts thereof from sequences in the stream.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'biopieces'
+require 'seq'
+
+casts = []
+casts << {:long=>'right', :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'left', :short=>'l', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+bp.each_record do |record|
+ if record.has_key? :SEQ
+ entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:TYPE], record[:SCORES])
+
+ if options[:right]
+ pos_right = entry.adaptor_locate_right(options[:right])
+ entry.subseq!(0, pos_right) if pos_right >= 0
+ record[:CLIP_ADAPTOR_RIGHT] = pos_right
+ else
+ record[:CLIP_ADAPTOR_RIGHT] = -1
+ end
+
+ if options[:left]
+ pos_left = entry.adaptor_locate_left(options[:left])
+ entry.subseq!(pos_left + 1) if pos_left >= 0
+ record[:CLIP_ADAPTOR_LEFT] = pos_left
+ else
+ record[:CLIP_ADAPTOR_LEFT] = -1
+ end
+
+ record[:SEQ] = entry.seq
+ record[:SEQ_LEN] = entry.len
+ record[:SCORES] = entry.qual
+ end
+
+ bp.puts record
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
# Method that returns a subsequence of from a given start position
# and of a given length.
- def subseq(start, length)
- raise SeqError, "sunsequence start: #{start} < 0" if start < 0
+ def subseq(start, length = self.length - start)
+ raise SeqError, "subsequence start: #{start} < 0" if start < 0
raise SeqError, "subsequence length: #{length} < 1" if length <= 0
raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length
Seq.new(self.seq_name, seq, self.type, qual)
end
+ # Method that replaces a sequence with a subsequence from a given start position
+ # and of a given length.
+ def subseq!(start, length = self.length - start)
+ raise SeqError, "subsequence start: #{start} < 0" if start < 0
+ raise SeqError, "subsequence length: #{length} < 1" if length <= 0
+ raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length
+
+ stop = start + length - 1
+
+ self.seq = self.seq[start .. stop]
+ self.qual = self.qual[start .. stop] unless self.qual.nil?
+ end
+
# Method that returns a subsequence of a given length
# beginning at a random position.
def subseq_rand(length)
assert_equal("CG", @entry.subseq(2, 2).seq)
end
+ def test_Seq_subseq_without_len_returns_correct_sequence
+ @entry.seq = "ATCG"
+ assert_equal("ATCG", @entry.subseq(0).seq)
+ assert_equal("CG", @entry.subseq(2).seq)
+ end
+
def test_Seq_subseq_returns_correct_qual
@entry.seq = "ATCG"
@entry.qual = "abcd"
assert_equal("cd", @entry.subseq(2, 2).qual)
end
+ def test_Seq_subseq_without_len_returns_correct_qual
+ @entry.seq = "ATCG"
+ @entry.qual = "abcd"
+ assert_equal("abcd", @entry.subseq(0).qual)
+ assert_equal("cd", @entry.subseq(2).qual)
+ end
+
+ def test_Seq_subseq_bang_with_start_lt_0_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.subseq!(-1, 1) }
+ end
+
+ def test_Seq_subseq_bang_with_length_lt_1_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.subseq!(0, 0) }
+ end
+
+ def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.subseq!(0, 5) }
+ end
+
+ def test_Seq_subseq_bang_returns_correct_sequence
+ @entry.seq = "ATCG"
+ @entry.subseq!(0, 2)
+ assert_equal("AT", @entry.seq)
+ @entry.seq = "ATCG"
+ @entry.subseq!(2, 2)
+ assert_equal("CG", @entry.seq)
+ end
+
+ def test_Seq_subseq_bang_without_len_returns_correct_sequence
+ @entry.seq = "ATCG"
+ @entry.subseq!(0)
+ assert_equal("ATCG", @entry.seq)
+ @entry.seq = "ATCG"
+ @entry.subseq!(2)
+ assert_equal("CG", @entry.seq)
+ end
+
+ def test_Seq_subseq_bang_returns_correct_qual
+ @entry.seq = "ATCG"
+ @entry.qual = "abcd"
+ @entry.subseq!(0, 2)
+ assert_equal("ab", @entry.qual)
+ @entry.seq = "ATCG"
+ @entry.qual = "abcd"
+ @entry.subseq!(2, 2)
+ assert_equal("cd", @entry.qual)
+ end
+
+ def test_Seq_subseq_bang_returns_correct_qual
+ @entry.seq = "ATCG"
+ @entry.qual = "abcd"
+ @entry.subseq!(0)
+ assert_equal("abcd", @entry.qual)
+ @entry.seq = "ATCG"
+ @entry.qual = "abcd"
+ @entry.subseq!(2)
+ assert_equal("cd", @entry.qual)
+ end
+
def test_Seq_subseq_rand_returns_correct_sequence
@entry.seq = "ATCG"
assert_equal("ATCG", @entry.subseq_rand(4).seq)