]> git.donarmstrong.com Git - biopieces.git/commitdiff
refactoring of revcomp in seq.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 28 Feb 2013 12:36:22 +0000 (12:36 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 28 Feb 2013 12:36:22 +0000 (12:36 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2106 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/find_adaptor
bp_bin/pcr_seq
bp_bin/shred_seq
code_ruby/lib/maasha/locator.rb
code_ruby/lib/maasha/seq.rb
code_ruby/test/maasha/test_seq.rb

index 4fd559dd281b45b06b372134e6215cc600ef55a2..ac3681a090c8e80c6139a4d1640ce4dd97664430 100755 (executable)
@@ -53,11 +53,11 @@ casts << {:long=>'deletions',   :short=>'d', :type=>'uint',   :mandatory=>false,
 options = Biopieces.options_parse(ARGV, casts)
 
 if options[:forward_rc]
-  options[:forward] = Seq.new("test", options[:forward_rc], 'dna').revcomp.seq
+  options[:forward] = Seq.new("test", options[:forward_rc], 'dna').reverse.complement.seq
 end
 
 if options[:reverse_rc]
-  options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').revcomp.seq
+  options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').reverse.complement.seq
 end
 
 raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse]
index 5cee30b152e49d2d0a6c7013c6d05952b80bc57a..57eb447b3684d51f7346bb6a111c2c386937b776 100755 (executable)
@@ -146,7 +146,7 @@ class Pattern
     seq      = Seq.new
     seq.seq  = primer
     seq.type = 'dna'
-    seq.revcomp
+    seq.reverse!.complement!
 
     descriptor ? seq.seq + descriptor : seq.seq
   end
@@ -164,11 +164,11 @@ tmpdir  = Biopieces.mktmpdir
 infile  = File.join(tmpdir, "in.fna")
 
 if options[:forward_rc]
-  options[:forward] = Seq.new("test", options[:forward_rc], 'dna').revcomp.seq
+  options[:forward] = Seq.new("test", options[:forward_rc], 'dna').reverse.complement.seq
 end
 
 if options[:reverse_rc]
-  options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').revcomp.seq
+  options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').reverse.complement.seq
 end
 
 raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse]
index 5b2a886af36e0708ebf4066795e0396067eae86b..465f08855bec36eb769bf319e34b5c5ff25d0d61 100755 (executable)
@@ -51,7 +51,7 @@ class Seq
 
     while coverage(sum) < max_cov
       entry = self.subseq_rand(size)
-      entry.revcomp if strand == '-'
+      entry.reverse!.complement! if strand == '-'
 
       if block_given?
         yield entry
index 59bcdb17ce52fcd09bac83f0e537eca692de5a06..5996663b0a4cf1c6137bbfd89e1a0d362572f0c5 100644 (file)
@@ -110,7 +110,7 @@ class Locator
           newseq = Seq.new(nil, @seq.seq[int_beg..int_end], "dna")
 
                                        unless newseq.seq.nil?
-                                               newseq.revcomp if comp
+                                               newseq.reverse!.complement! if comp
 
                                                @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
                                        end
@@ -120,7 +120,7 @@ class Locator
           newseq = Seq.new(nil, @seq.seq[pos], "dna")
 
                                        unless newseq.seq.nil?
-               newseq.revcomp if comp 
+               newseq.reverse!.complement! if comp 
 
                @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
                                        end
index e72250d596cc5d1dd55adac34d3a15b3503048fa..4a0af47ed0f5fde9d5ffe87ca19a3caec087f6d6 100644 (file)
@@ -150,6 +150,7 @@ class Seq
   # by inspecting the first 100 residues.
   def type_guess!
     self.type = self.type_guess
+    self
   end
 
   # Returns the length of a sequence.
@@ -334,17 +335,13 @@ class Seq
     key
   end
 
-  # Method to reverse complement sequence.
-  def reverse_complement
-    self.reverse
-    self.complement
-    self
+  # Method to reverse the sequence.
+  def reverse
+    Seq.new(self.seq_name, self.seq.reverse, self.type, self.qual ? self.qual.reverse : self.qual)
   end
 
-  alias :revcomp :reverse_complement
-
   # Method to reverse the sequence.
-  def reverse
+  def reverse!
     self.seq.reverse!
     self.qual.reverse! if self.qual
     self
@@ -354,6 +351,26 @@ class Seq
   def complement
     raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
 
+    entry          = Seq.new
+    entry.seq_name = self.seq_name
+    entry.type     = self.type
+    entry.qual     = self.qual
+
+    if self.is_dna?
+      entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
+    elsif self.is_rna?
+      entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn')
+    else
+      raise SeqError, "Cannot complement sequence type: #{self.type}"
+    end
+
+    entry
+  end
+
+  # Method that complements sequence including ambiguity codes.
+  def complement!
+    raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
+
     if self.is_dna?
       self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
     elsif self.is_rna?
@@ -361,6 +378,8 @@ class Seq
     else
       raise SeqError, "Cannot complement sequence type: #{self.type}"
     end
+
+    self
   end
 
   # Method to determine the Hamming Distance between
@@ -488,21 +507,6 @@ class Seq
     ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2)
   end
 
-  # Hard masks sequence residues where the corresponding quality score
-  # is below a given cutoff.
-  def mask_seq_hard_old(cutoff)
-    seq    = self.seq.upcase
-    scores = self.qual
-    i      = 0
-
-    scores.each_char do |score|
-      seq[i] = 'N' if score.ord - SCORE_BASE < cutoff
-      i += 1 
-    end
-
-    self.seq = seq
-  end
-
   # Hard masks sequence residues where the corresponding quality score
   # is below a given cutoff.
   def mask_seq_hard!(cutoff)
index f966a5fa47b2387c74f1c2a785b6f8e49082f1be..8bc6852927235b939414d80a02329b3b5490def7 100755 (executable)
@@ -219,7 +219,15 @@ class TestSeq < Test::Unit::TestCase
 
   def test_Seq_reverse_returns_correctly
     @entry.seq = "ATCG"
-    assert_equal("GCTA", @entry.reverse.seq)
+    new_entry  = @entry.reverse
+    assert_equal("GCTA", new_entry.seq)
+    assert_equal("ATCG", @entry.seq)
+  end
+
+  def test_Seq_reverse_bang_returns_correctly
+    @entry.seq = "ATCG"
+    @entry.reverse!
+    assert_equal("GCTA", @entry.seq)
   end
 
   def test_Seq_complement_raises_if_no_sequence
@@ -236,27 +244,43 @@ class TestSeq < Test::Unit::TestCase
   def test_Seq_complement_for_DNA_is_correct
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
-    assert_equal("TAGCtagc", @entry.complement)
+    comp        = @entry.complement
+    assert_equal("TAGCtagc", comp.seq)
+    assert_equal("ATCGatcg", @entry.seq)
   end
 
   def test_Seq_complement_for_RNA_is_correct
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
-    assert_equal("UAGCuagc", @entry.complement)
+    comp        = @entry.complement
+    assert_equal("UAGCuagc", comp.seq)
+    assert_equal("AUCGaucg", @entry.seq)
+  end
+
+  def test_Seq_complement_bang_raises_if_no_sequence
+    @entry.type = 'dna'
+    assert_raise(SeqError) { @entry.complement! }
+  end
+
+  def test_Seq_complement_bang_raises_on_bad_type
+    @entry.seq  = 'ATCG'
+    @entry.type = 'protein'
+    assert_raise(SeqError) { @entry.complement! }
   end
 
-  def test_Seq_reverse_complement_for_DNA_is_correct
+  def test_Seq_complement_bang_for_DNA_is_correct
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
-    assert_equal("cgatCGAT", @entry.reverse_complement.seq)
+    assert_equal("TAGCtagc", @entry.complement!.seq)
   end
 
-  def test_Seq_reverse_complement_for_RNA_is_correct
+  def test_Seq_complement_bang_for_RNA_is_correct
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
-    assert_equal("cgauCGAU", @entry.reverse_complement.seq)
+    assert_equal("UAGCuagc", @entry.complement!.seq)
   end
 
+
   def test_Seq_hamming_distance_returns_correctly
     seq1 = Seq.new("test1", "ATCG")
     seq2 = Seq.new("test2", "atgg")