]> git.donarmstrong.com Git - biopieces.git/commitdiff
added remove_illumina_adaptor Biopiece
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sun, 20 Feb 2011 13:06:22 +0000 (13:06 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sun, 20 Feb 2011 13:06:22 +0000 (13:06 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1277 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/remove_illumina_adaptor [new file with mode: 0755]
code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

diff --git a/bp_bin/remove_illumina_adaptor b/bp_bin/remove_illumina_adaptor
new file mode 100755 (executable)
index 0000000..ad52cc5
--- /dev/null
@@ -0,0 +1,75 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Remove Illumina adaptors or parts thereof from sequences in the stream.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'biopieces'
+require 'seq'
+
+casts = []
+casts << {:long=>'right', :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'left',  :short=>'l', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+bp.each_record do |record|
+  if record.has_key? :SEQ
+    entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:TYPE], record[:SCORES])
+
+    if options[:right]
+      pos_right = entry.adaptor_locate_right(options[:right])
+      entry.subseq!(0, pos_right) if pos_right >= 0
+      record[:CLIP_ADAPTOR_RIGHT] = pos_right
+    else
+      record[:CLIP_ADAPTOR_RIGHT] = -1
+    end
+
+    if options[:left]
+      pos_left = entry.adaptor_locate_left(options[:left])
+      entry.subseq!(pos_left + 1) if pos_left >= 0
+      record[:CLIP_ADAPTOR_LEFT] = pos_left
+    else
+      record[:CLIP_ADAPTOR_LEFT] = -1
+    end
+    
+    record[:SEQ]     = entry.seq
+    record[:SEQ_LEN] = entry.len
+    record[:SCORES]  = entry.qual
+  end
+
+  bp.puts record
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
index de13ec21b034fe8a65074d8aaff8afbc17485121..19cd1b1e9fbf6dfce1ad553567803c4626a4bfde 100644 (file)
@@ -223,8 +223,8 @@ class Seq
 
   # Method that returns a subsequence of from a given start position
   # and of a given length.
-  def subseq(start, length)
-    raise SeqError, "sunsequence start: #{start} < 0"                                                if start  < 0
+  def subseq(start, length = self.length - start)
+    raise SeqError, "subsequence start: #{start} < 0"                                                if start  < 0
     raise SeqError, "subsequence length: #{length} < 1"                                              if length <= 0
     raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length
 
@@ -236,6 +236,19 @@ class Seq
     Seq.new(self.seq_name, seq, self.type, qual)
   end
 
+  # Method that replaces a sequence with a subsequence from a given start position
+  # and of a given length.
+  def subseq!(start, length = self.length - start)
+    raise SeqError, "subsequence start: #{start} < 0"                                                if start  < 0
+    raise SeqError, "subsequence length: #{length} < 1"                                              if length <= 0
+    raise SeqError, "subsequence start + length > Seq.length: #{start} + #{length} > #{self.length}" if start + length > self.length
+
+    stop = start + length - 1
+
+    self.seq  = self.seq[start .. stop]
+    self.qual = self.qual[start .. stop] unless self.qual.nil?
+  end
+
   # Method that returns a subsequence of a given length
   # beginning at a random position.
   def subseq_rand(length)
index 2482422611ca53fbdf9a5cb5e61b3848011706e7..9296e14c9eb57e23457a34228c2011854fb7e8e0 100755 (executable)
@@ -213,6 +213,12 @@ class TestSeq < Test::Unit::TestCase
     assert_equal("CG", @entry.subseq(2, 2).seq)
   end
 
+  def test_Seq_subseq_without_len_returns_correct_sequence
+    @entry.seq  = "ATCG"
+    assert_equal("ATCG", @entry.subseq(0).seq)
+    assert_equal("CG",   @entry.subseq(2).seq)
+  end
+
   def test_Seq_subseq_returns_correct_qual
     @entry.seq  = "ATCG"
     @entry.qual = "abcd"
@@ -220,6 +226,68 @@ class TestSeq < Test::Unit::TestCase
     assert_equal("cd", @entry.subseq(2, 2).qual)
   end
 
+  def test_Seq_subseq_without_len_returns_correct_qual
+    @entry.seq  = "ATCG"
+    @entry.qual = "abcd"
+    assert_equal("abcd", @entry.subseq(0).qual)
+    assert_equal("cd",   @entry.subseq(2).qual)
+  end
+
+  def test_Seq_subseq_bang_with_start_lt_0_raises
+    @entry.seq = "ATCG"
+    assert_raise(SeqError) { @entry.subseq!(-1, 1) }
+  end
+
+  def test_Seq_subseq_bang_with_length_lt_1_raises
+    @entry.seq = "ATCG"
+    assert_raise(SeqError) { @entry.subseq!(0, 0) }
+  end
+
+  def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
+    @entry.seq = "ATCG"
+    assert_raise(SeqError) { @entry.subseq!(0, 5) }
+  end
+
+  def test_Seq_subseq_bang_returns_correct_sequence
+    @entry.seq  = "ATCG"
+    @entry.subseq!(0, 2)
+    assert_equal("AT", @entry.seq)
+    @entry.seq  = "ATCG"
+    @entry.subseq!(2, 2)
+    assert_equal("CG", @entry.seq)
+  end
+
+  def test_Seq_subseq_bang_without_len_returns_correct_sequence
+    @entry.seq  = "ATCG"
+    @entry.subseq!(0)
+    assert_equal("ATCG", @entry.seq)
+    @entry.seq  = "ATCG"
+    @entry.subseq!(2)
+    assert_equal("CG", @entry.seq)
+  end
+
+  def test_Seq_subseq_bang_returns_correct_qual
+    @entry.seq  = "ATCG"
+    @entry.qual = "abcd"
+    @entry.subseq!(0, 2)
+    assert_equal("ab", @entry.qual)
+    @entry.seq  = "ATCG"
+    @entry.qual = "abcd"
+    @entry.subseq!(2, 2)
+    assert_equal("cd", @entry.qual)
+  end
+
+  def test_Seq_subseq_bang_returns_correct_qual
+    @entry.seq  = "ATCG"
+    @entry.qual = "abcd"
+    @entry.subseq!(0)
+    assert_equal("abcd", @entry.qual)
+    @entry.seq  = "ATCG"
+    @entry.qual = "abcd"
+    @entry.subseq!(2)
+    assert_equal("cd", @entry.qual)
+  end
+
   def test_Seq_subseq_rand_returns_correct_sequence
     @entry.seq  = "ATCG"
     assert_equal("ATCG", @entry.subseq_rand(4).seq)