]> git.donarmstrong.com Git - biopieces.git/commitdiff
added adaptor_locate and adaptor_clip methods to ruby code
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sat, 19 Feb 2011 14:51:05 +0000 (14:51 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sat, 19 Feb 2011 14:51:05 +0000 (14:51 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1273 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

index a8b4a84f1dc896675ad51026607f461b43fc7406..72ca1cd62c742b65bcb0787bb3ddb6cba3cde83f 100644 (file)
@@ -201,7 +201,7 @@ class Seq
   end
 
   # Method that generates a random sequence of a given length and type.
-  def generate(length,type)
+  def generate(length, type)
     raise SeqError, "Cannot generate sequence length < 1: #{length}" if length <= 0
 
     case type.downcase
@@ -290,6 +290,67 @@ class Seq
     ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2)
   end
 
+  # Method that locates an adaptor or part thereof in the sequence
+  # of a Seq object beginning from the right. Returns the location
+  # in the sequence that overlaps with the adaptor or -1 if the
+  # adaptor was not found.
+  def adaptor_locate_right(adaptor)
+    pos = self.seq.length - adaptor.length
+
+    while pos < self.seq.length
+      len        = self.seq.length - pos
+      subseq     = self.seq[pos ... pos + len].upcase
+      subadaptor = adaptor[0 ... len].upcase
+
+      return pos if subseq == subadaptor
+      pos += 1
+    end
+
+    -1
+  end
+
+  # Method that locates an adaptor or part thereof in the sequence
+  # of a Seq object beginning from the left. Returns the location
+  # in the sequence that overlaps with the adaptor or -1 if the
+  # adaptor was not found.
+  def adaptor_locate_left(adaptor)
+    pos = adaptor.length
+
+    while pos > 0
+      len        = pos
+      subseq     = self.seq[0 ... len].upcase
+      subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase
+
+      pos -= 1
+
+      return pos if subseq == subadaptor
+    end
+    
+    -1
+  end
+
+  # Method that locates an adaptor or part thereof in the sequence
+  # of a Seq object beginning from the right and removes the adaptor
+  # sequence if found.
+  def adaptor_clip_right(adaptor)
+    pos = self.adaptor_locate_right(adaptor)
+
+    if pos > 0
+      self.seq = self.seq[0 ... pos]
+    end
+  end
+
+  # Method that locates an adaptor or part thereof in the sequence
+  # of a Seq object beginning from the left and removes the adaptor
+  # sequence if found.
+  def adaptor_clip_left(adaptor)
+    pos = self.adaptor_locate_left(adaptor)
+
+    if pos > 0
+      self.seq = self.seq[pos + 1 ... self.seq.length]
+    end
+  end
+
   # Method to convert the quality scores from a specified base
   # to another base.
   def convert_phred2illumina!
@@ -412,9 +473,6 @@ end
 __END__
 
 
-
-
-
 # Class containing generic sequence methods and nucleic acid and amino acid subclasses.
 class Seq < String
   # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
@@ -432,24 +490,6 @@ class Seq < String
     end
   end
 
-  # Method to wrap a sequence to a given width using a given delimiter.
-  def wrap(width = 80, delimit = $/)
-    raise ArgumentError, "Cannot wrap sequence to negative width: #{width}." if width <= 0
-
-    self.delete!(" \t\n\r")
-    self.gsub(/.{#{width}}(?!$)/, "\\0#{delimit}")
-  end
-
-  # Method to wrap and replace a sequence to a given width using a given delimiter.
-  def wrap!(width = 80, delimit = $/)
-    self.replace(self.wrap(width, delimit))
-  end
-
-  # Method that replaces sequence with a randomly generated sequence of a given length.
-  def generate!(length)
-    self.replace(self.generate(length))
-  end
-
   # Class containing methods specific for amino acid (AA) sequences.
   class AA < Seq
     # Method that returns an array of amino acid residues.
index e1dfd87807c09816c07cf12bae633a6c3aad7a6b..86687386e3403e5f03772a9f01d48f5e399f3356 100755 (executable)
@@ -187,7 +187,7 @@ class TestSeq < Test::Unit::TestCase
   end
 
   def test_Seq_generate_with_ok_type_dont_raise
-    %w[ dna DNA rna RNA protein Protein ].each do |type|
+    %w[dna DNA rna RNA protein Protein].each do |type|
       assert_nothing_raised { @entry.generate(10, type) }
     end
   end
@@ -264,6 +264,38 @@ class TestSeq < Test::Unit::TestCase
     assert_equal(25.00, @entry.soft_mask)
   end
 
+  def test_Seq_adaptor_locate_right_returns_correctly
+    @entry.seq = "nnnnncgat"
+    assert_equal(-1, @entry.adaptor_locate_right("X"))
+    assert_equal(8,  @entry.adaptor_locate_right("TX"))
+    assert_equal(7,  @entry.adaptor_locate_right("ATX"))
+    assert_equal(6,  @entry.adaptor_locate_right("GATX"))
+    assert_equal(5,  @entry.adaptor_locate_right("CGATX"))
+    assert_equal(0,  @entry.adaptor_locate_right("NNNNNCGAT"))
+  end
+
+  def test_Seq_adaptor_locate_left_returns_correctly
+    @entry.seq = "cgatnnnnn"
+    assert_equal(-1, @entry.adaptor_locate_left("X"))
+    assert_equal(0,  @entry.adaptor_locate_left("XC"))
+    assert_equal(1,  @entry.adaptor_locate_left("XCG"))
+    assert_equal(2,  @entry.adaptor_locate_left("XCGA"))
+    assert_equal(3,  @entry.adaptor_locate_left("XCGAT"))
+    assert_equal(8,  @entry.adaptor_locate_left("CGATNNNNN"))
+  end
+
+  def test_Seq_adaptor_clip_right_returns_correct_sequence
+    @entry.seq = "nnnnncgat"
+    @entry.adaptor_clip_right("cgat")
+    assert_equal( "nnnnn", @entry.seq)
+  end
+
+  def test_Seq_adaptor_clip_left_returns_correct_sequence
+    @entry.seq = "cgatnnnnn"
+    @entry.adaptor_clip_left("cgat")
+    assert_equal( "nnnnn", @entry.seq)
+  end
+
   def test_Digest_new_raises_on_bad_pattern_residue
     assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
   end
@@ -281,242 +313,3 @@ end
 
 
 __END__
-
-class TestSeq < Test::Unit::TestCase 
-  # Testing Seq#guess_type
-
-  def test_guess_type_raise_if_no_sequence
-    s = Seq.new
-
-    assert_raise( ArgumentError ) { s.guess_type }
-  end
-
-  def test_guess_type_AA_uppercase
-    s1 = Seq.new( "SEQ" )
-    s2 = Seq::AA.new( "SEQ" )
-    assert_equal( s1.guess_type.class, s2.class )
-  end
-
-  def test_guess_type_AA_lowercase
-    s1 = Seq.new( "seq" )
-    s2 = Seq::AA.new( "seq" )
-
-    assert_equal( s1.guess_type.class, s2.class )
-  end
-
-  def test_guess_type_DNA_uppercase
-    s1 = Seq.new( "ATCG" )
-    s2 = Seq::NA::DNA.new( "ATCG" )
-
-    assert_equal( s1.guess_type.class, s2.class )
-  end
-
-  def test_guess_type_DNA_lowercase
-    s1 = Seq.new( "atcg" )
-    s2 = Seq::NA::DNA.new( "atcg" )
-
-    assert_equal( s1.guess_type.class, s2.class )
-  end
-
-  def test_guess_type_RNA_uppercase
-    s1 = Seq.new( "AUCG" )
-    s2 = Seq::NA::RNA.new( "AUCG" )
-
-    assert_equal( s1.guess_type.class, s2.class )
-  end
-
-  def test_guess_type_RNA_lowercase
-    s1 = Seq.new( "aucg" )
-    s2 = Seq::NA::RNA.new( "aucg" )
-
-    assert_equal( s1.guess_type.class, s2.class )
-  end
-
-  # Testing Seq#wrap
-
-  def test_wrap_arg_is_a_positive_number
-    s = Seq.new
-
-    assert_raise( ArgumentError ) { s.wrap( 0 ) }
-    assert_raise( ArgumentError ) { s.wrap( -10 ) }
-  end
-
-  def test_wrap_with_0_args
-    s = Seq.new( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACGACTACGACT" )
-
-    assert_equal( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACG\nACTACGACT", s.wrap.to_s )
-  end
-
-  def test_wrap_with_1_args
-    s = Seq.new( "ATCG" )
-
-    assert_equal( "AT\nCG", s.wrap( 2 ).to_s )
-  end
-
-  def test_wrap_with_2_args
-    s = Seq.new( "ATCG" )
-
-    assert_equal( "AT\rCG", s.wrap( 2, "\r" ).to_s )
-  end
-
-  def test_wrap_dont_change_instance_var
-    s = Seq.new( "ATCG" )
-
-    s.wrap( 2 )
-
-    assert_equal( "ATCG", s.to_s )
-  end
-
-  # Testing Seq#wrap!
-
-  def test_wrap_with_0_args!
-    s = Seq.new( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACGACTACGACT" )
-
-    s.wrap!
-
-    assert_equal( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACG\nACTACGACT", s.to_s )
-  end
-
-  def test_wrap_with_1_args!
-    s = Seq.new( "ATCG" )
-
-    s.wrap!( 2 )
-
-    assert_equal( "AT\nCG", s.to_s )
-  end
-
-  def test_wrap_with_2_args!
-    s = Seq.new( "ATCG" )
-
-    s.wrap!( 2, "\r" )
-
-    assert_equal( "AT\rCG", s.to_s )
-  end
-
-  # Testing Seq#generate
-
-  def test_generate_arg_is_a_positive_number
-    s = Seq.new
-
-    assert_raise( ArgumentError ) { s.generate( 0 ) }
-    assert_raise( ArgumentError ) { s.generate( -10 ) }
-  end
-
-  def test_generate
-    s = Seq::AA.new
-
-    seq = s.generate( 40 )
-
-    assert_equal( 40, seq.length )
-  end
-
-  def test_generate_dont_change_instance_var
-    s = Seq::AA.new
-
-    seq = s.generate( 40 )
-
-    assert_equal( "", s.to_s )
-  end
-
-  # Testing Seq#generate!
-
-  def test_generate!
-    s = Seq::AA.new
-
-    s.generate!( 40 )
-
-    assert_equal( 40, s.length )
-  end
-
-  # Testing Seq::AA#residues
-
-  def test_Seq_AA_residues
-    s = Seq::AA.new
-
-    assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.residues )
-  end
-
-  # Testing Seq::AA#mol_weight
-
-  def test_Seq_aa_mol_weight_bad_residue
-    s = Seq::AA.new( "7" )
-    assert_raise( ArgumentError ) { s.mol_weight }
-  end
-
-  def test_Seq_aa_mol_wight_return_correct_uppercase
-    s = Seq::AA.new( "SEQ" )
-    assert_equal( 398.0, s.mol_weight )
-  end
-
-  def test_Seq_aa_mol_wight_return_correct_lowercase
-    s = Seq::AA.new( "seq" )
-    assert_equal( 398.0, s.mol_weight )
-  end
-
-  # Testing Seq::NA::DNA#residues
-
-  def test_Seq_NA_DNA_residues
-    s = Seq::NA::DNA.new
-
-    assert_equal( %w{ A T C G }, s.residues )
-  end
-
-  # Testing Seq::NA::DNA#complement
-
-  def test_Seq_NA_DNA_complement_correct
-    s = Seq::NA::DNA.new( "ATCGatcg" )
-    assert_equal( "TAGCtagc", s.complement.to_s )
-  end
-
-  # Testing Seq::NA::DNA#to_RNA
-
-  def test_Seq_NA_DNA_to_RNA_returns_RNA_object
-    dna = Seq::NA::DNA.new( "ATCGatcg" )
-    rna = Seq::NA::RNA.new
-
-    new_rna = dna.to_RNA
-
-    assert_equal( rna.class, new_rna.class )
-  end
-
-  def test_Seq_NA_DNA_to_RNA_is_correct
-    dna = Seq::NA::DNA.new( "ATCGatcg" )
-    rna = dna.to_RNA
-
-    assert_equal( "AUCGaucg", rna.to_s )
-  end
-
-  # Testing Seq::NA::RNA#residues
-
-  def test_Seq_NA_RNA_residues
-    s = Seq::NA::RNA.new
-
-    assert_equal( %w{ A U C G }, s.residues )
-  end
-
-  # Testing Seq::NA::RNA#complement
-
-  def test_Seq_NA_RNA_complement_correct
-    s = Seq::NA::RNA.new( "AUCGaucg" )
-    assert_equal( "UAGCuagc", s.complement.to_s )
-  end
-
-  # Testing Seq::NA::RNA#to_DNA
-
-  def test_Seq_NA_RNA_to_DNA_returns_DNA_object
-    rna = Seq::NA::RNA.new( "AUCGaucg" )
-    dna = Seq::NA::DNA.new
-
-    new_dna = rna.to_DNA
-
-    assert_equal( dna.class, new_dna.class )
-  end
-
-  def test_Seq_NA_RNA_to_DNA_is_correct
-    rna = Seq::NA::RNA.new( "AUCGaucg" )
-    dna = rna.to_DNA
-
-    assert_equal( "ATCGatcg", dna.to_s )
-  end
-end
-