]> git.donarmstrong.com Git - biopieces.git/blobdiff - code_ruby/test/maasha/test_seq.rb
refactoring of revcomp in seq.rb
[biopieces.git] / code_ruby / test / maasha / test_seq.rb
index cda1f94045b0752b89deedf75329a7d37803cb94..8bc6852927235b939414d80a02329b3b5490def7 100755 (executable)
@@ -9,11 +9,21 @@ class TestSeq < Test::Unit::TestCase
     @entry = Seq.new
   end
 
-  #  def test_Seq# autoremoves whitespace, newlines, and carriage returns
+  #  # autoremoves whitespace, newlines, and carriage returns
+  #  def test_Seq_strip
   #    dna = Seq.new
   #    dna.seq = "A\tT\r\tC\nG  "
   #    assert_equal(dna.seq, "ATCG")
   #  end
+  
+  def test_Seq_new_bp_returns_correctly
+    record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
+    seq    = Seq.new_bp(record)
+    assert_equal("test", seq.seq_name)
+    assert_equal("ATCG", seq.seq)
+    assert_equal("dna",  seq.type)
+    assert_equal("hhhh", seq.qual)
+  end
 
   def test_Seq_is_dna_with_no_sequence_type_returns_false
     assert(@entry.is_dna? == false)
@@ -157,6 +167,28 @@ class TestSeq < Test::Unit::TestCase
     assert_raise(SeqError) { @entry.to_bp }
   end
 
+  def test_Seq_to_fasta_raises_on_missing_seq_name
+    @entry.seq = 'ATCG'
+    assert_raise(SeqError) { @entry.to_fasta }
+  end
+
+  def test_Seq_to_fasta_raises_on_empty_seq_name
+    @entry.seq_name = ''
+    @entry.seq      = 'ATCG'
+    assert_raise(SeqError) { @entry.to_fasta }
+  end
+
+  def test_Seq_to_fasta_raises_on_missing_seq
+    @entry.seq_name = 'test'
+    assert_raise(SeqError) { @entry.to_fasta }
+  end
+
+  def test_Seq_to_fasta_raises_on_empty_seq
+    @entry.seq_name = 'test'
+    @entry.seq      = ''
+    assert_raise(SeqError) { @entry.to_fasta }
+  end
+
   def test_Seq_to_fasta_returns_correct_entry
     @entry.seq_name = 'test'
     @entry.seq      = 'ATCG'
@@ -187,7 +219,15 @@ class TestSeq < Test::Unit::TestCase
 
   def test_Seq_reverse_returns_correctly
     @entry.seq = "ATCG"
-    assert_equal("GCTA", @entry.reverse)
+    new_entry  = @entry.reverse
+    assert_equal("GCTA", new_entry.seq)
+    assert_equal("ATCG", @entry.seq)
+  end
+
+  def test_Seq_reverse_bang_returns_correctly
+    @entry.seq = "ATCG"
+    @entry.reverse!
+    assert_equal("GCTA", @entry.seq)
   end
 
   def test_Seq_complement_raises_if_no_sequence
@@ -204,27 +244,43 @@ class TestSeq < Test::Unit::TestCase
   def test_Seq_complement_for_DNA_is_correct
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
-    assert_equal("TAGCtagc", @entry.complement)
+    comp        = @entry.complement
+    assert_equal("TAGCtagc", comp.seq)
+    assert_equal("ATCGatcg", @entry.seq)
   end
 
   def test_Seq_complement_for_RNA_is_correct
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
-    assert_equal("UAGCuagc", @entry.complement)
+    comp        = @entry.complement
+    assert_equal("UAGCuagc", comp.seq)
+    assert_equal("AUCGaucg", @entry.seq)
+  end
+
+  def test_Seq_complement_bang_raises_if_no_sequence
+    @entry.type = 'dna'
+    assert_raise(SeqError) { @entry.complement! }
   end
 
-  def test_Seq_reverse_complement_for_DNA_is_correct
+  def test_Seq_complement_bang_raises_on_bad_type
+    @entry.seq  = 'ATCG'
+    @entry.type = 'protein'
+    assert_raise(SeqError) { @entry.complement! }
+  end
+
+  def test_Seq_complement_bang_for_DNA_is_correct
     @entry.seq  = 'ATCGatcg'
     @entry.type = 'dna'
-    assert_equal("cgatCGAT", @entry.reverse_complement)
+    assert_equal("TAGCtagc", @entry.complement!.seq)
   end
 
-  def test_Seq_reverse_complement_for_RNA_is_correct
+  def test_Seq_complement_bang_for_RNA_is_correct
     @entry.seq  = 'AUCGaucg'
     @entry.type = 'rna'
-    assert_equal("cgauCGAU", @entry.reverse_complement)
+    assert_equal("UAGCuagc", @entry.complement!.seq)
   end
 
+
   def test_Seq_hamming_distance_returns_correctly
     seq1 = Seq.new("test1", "ATCG")
     seq2 = Seq.new("test2", "atgg")
@@ -246,14 +302,22 @@ class TestSeq < Test::Unit::TestCase
     end
   end
 
-  def test_Seq_subseq_with_start_lt_0_raises
-    @entry.seq = "ATCG"
-    assert_raise(SeqError) { @entry.subseq(-1, 1) }
+  def test_Seq_shuffle_returns_correctly
+    orig       = "actgactgactgatcgatcgatcgatcgtactg" 
+    @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
+    entry_shuf = @entry.shuffle
+    assert_equal(orig, @entry.seq)
+    assert_not_equal(@entry.seq, entry_shuf.seq)
+  end
+
+  def test_Seq_shuffle_bang_returns_correctly
+    @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
+    assert_not_equal(@entry.seq, @entry.shuffle!.seq)
   end
 
-  def test_Seq_subseq_with_length_lt_1_raises
+  def test_Seq_subseq_with_start_lt_0_raises
     @entry.seq = "ATCG"
-    assert_raise(SeqError) { @entry.subseq(0, 0) }
+    assert_raise(SeqError) { @entry.subseq(-1, 1) }
   end
 
   def test_Seq_subseq_with_start_plus_length_gt_seq_raises
@@ -292,11 +356,6 @@ class TestSeq < Test::Unit::TestCase
     assert_raise(SeqError) { @entry.subseq!(-1, 1) }
   end
 
-  def test_Seq_subseq_bang_with_length_lt_1_raises
-    @entry.seq = "ATCG"
-    assert_raise(SeqError) { @entry.subseq!(0, 0) }
-  end
-
   def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
     @entry.seq = "ATCG"
     assert_raise(SeqError) { @entry.subseq!(0, 5) }
@@ -347,6 +406,19 @@ class TestSeq < Test::Unit::TestCase
     assert_equal("ATCG", @entry.subseq_rand(4).seq)
   end
 
+  def test_Seq_indels_remove_without_qual_returns_correctly
+    @entry.seq  = "A-T.CG~CG"
+    @entry.qual = nil
+    assert_equal("ATCGCG", @entry.indels_remove.seq)
+  end
+
+  def test_Seq_indels_remove_with_qual_returns_correctly
+    @entry.seq  = "A-T.CG~CG"
+    @entry.qual = "a@b@cd@fg"
+    assert_equal("ATCGCG", @entry.indels_remove.seq)
+    assert_equal("abcdfg", @entry.indels_remove.qual)
+  end
+
   def test_Seq_composition_returns_correctly
     @entry.seq = "AAAATTTCCG"
     assert_equal(4, @entry.composition["A"])
@@ -385,7 +457,268 @@ class TestSeq < Test::Unit::TestCase
     @entry.seq = "--AAAa"
     assert_equal(25.00, @entry.soft_mask)
   end
-end
 
+  def test_Seq_mask_seq_hard_bang_with_nil_seq_raises
+    @entry.seq  = nil
+    @entry.qual = ""
+
+    assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_with_nil_qual_raises
+    @entry.seq  = ""
+    @entry.qual = nil
+
+    assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises
+    assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
+    assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise
+    @entry.seq  = "ATCG"
+    @entry.qual = "RSTU"
+
+    assert_nothing_raised { @entry.mask_seq_hard!(0) }
+    assert_nothing_raised { @entry.mask_seq_hard!(40) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_returns_correctly
+    @entry.seq  = "-ATCG"
+    @entry.qual = "RRSTU"
+
+    assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_nil_seq_raises
+    @entry.seq  = nil
+    @entry.qual = ""
+
+    assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_nil_qual_raises
+    @entry.seq  = ""
+    @entry.qual = nil
+
+    assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises
+    assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
+    assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise
+    @entry.seq  = "ATCG"
+    @entry.qual = "RSTU"
+
+    assert_nothing_raised { @entry.mask_seq_soft!(0) }
+    assert_nothing_raised { @entry.mask_seq_soft!(40) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_returns_correctly
+    @entry.seq  = "-ATCG"
+    @entry.qual = "RRSTU"
+
+    assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
+  end
+
+  # qual score detection
+
+  def test_Seq_qual_base33_returns_correctly
+    # self.qual.match(/[!-:]/)
+    @entry.qual = '!"#$%&\'()*+,-./0123456789:'
+    assert_equal(true,  @entry.qual_base33? )
+    @entry.qual = 32.chr
+    assert_equal(false, @entry.qual_base33? )
+    @entry.qual = 59.chr
+    assert_equal(false, @entry.qual_base33? )
+  end
+
+  def test_Seq_qual_base64_returns_correctly
+    # self.qual.match(/[K-h]/)
+    @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
+    assert_equal(true,  @entry.qual_base64? )
+    @entry.qual = 74.chr
+    assert_equal(false, @entry.qual_base64? )
+    @entry.qual = 105.chr
+    assert_equal(false, @entry.qual_base64? )
+  end
+
+  def test_Seq_qual_valid_with_nil_qual_raises
+    assert_raise(SeqError) { @entry.qual_valid?("illumina1.8") }
+  end
+
+  def test_Seq_qual_valid_with_bad_encoding_raises
+    @entry.qual = "abc"
+    assert_raise(SeqError) { @entry.qual_valid?("foobar") }
+  end
+
+  def test_Seq_qual_valid_returns_correctly
+    tests = [["sanger",      0, 93, 33],
+             ["454",         0, 62, 64],
+             ["solexa",     -5, 62, 64],
+             ["illumina13",  0, 62, 64],
+             ["illumina15",  0, 62, 64],
+             ["illumina18",  0, 93, 33]]
+
+    tests.each do |test|
+      @entry.qual = (test[1] + test[-1]).chr + (test[2] + test[-1]).chr
+      assert_equal(true, @entry.qual_valid?(test[0]))
+      @entry.qual = (test[1] + test[-1] - 1).chr
+      assert_equal(false, @entry.qual_valid?(test[0]))
+      @entry.qual = (test[2] + test[-1] + 1).chr
+      assert_equal(false, @entry.qual_valid?(test[0]))
+    end
+  end
+
+  # convert sanger to ...
+
+  def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'illumina18').qual)
+  end
+
+  # convert solexa to ...
+
+  def test_Seq_convert_scores_bang_from_solexa_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'illumina18').qual)
+  end
+
+  # convert illumina13 to ...
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'illumina18').qual)
+  end
+
+  # convert illumina15 to ...
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'illumina18').qual)
+  end
+
+  # convert illumina18 to ...
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'illumina18').qual)
+  end
+
+  def test_Seq_scores_mean_without_qual_raises
+    @entry.qual = nil
+    assert_raise(SeqError) { @entry.scores_mean }
+  end
+
+  def test_Seq_scores_mean_returns_correctly
+    @entry.qual = '@@hh'
+    assert_equal(20.0, @entry.scores_mean)
+  end
+end
 
 __END__