7 class TestSeq < Test::Unit::TestCase
12 # def test_Seq# autoremoves whitespace, newlines, and carriage returns
14 # dna.seq = "A\tT\r\tC\nG "
15 # assert_equal(dna.seq, "ATCG")
18 def test_Seq_new_bp_returns_correctly
19 record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
20 seq = Seq.new_bp(record)
21 assert_equal("test", seq.seq_name)
22 assert_equal("ATCG", seq.seq)
23 assert_equal("dna", seq.type)
24 assert_equal("hhhh", seq.qual)
27 def test_Seq_is_dna_with_no_sequence_type_returns_false
28 assert(@entry.is_dna? == false)
31 def test_Seq_is_dna_with_dna_sequence_type_returns_true
33 assert(@entry.is_dna? == true)
36 def test_Seq_is_rna_with_no_sequence_type_returns_false
37 assert(@entry.is_rna? == false)
40 def test_Seq_is_rna_with_rna_sequence_type_returns_true
42 assert(@entry.is_rna? == true)
45 def test_Seq_is_protein_with_no_sequence_type_returns_false
46 assert(@entry.is_protein? == false)
49 def test_Seq_is_protein_with_protein_sequence_type_returns_true
50 @entry.type = 'protein'
51 assert_equal(true, @entry.is_protein?)
54 def test_Seq_type_guess_without_sequence_raises
55 assert_raise(SeqError) { @entry.type_guess }
58 def test_Seq_type_guess_with_protein_returns_protein
59 @entry.seq = 'atcatcrFgatcg'
60 assert_equal('protein', @entry.type_guess)
63 def test_Seq_type_guess_with_rna_returns_rna
64 @entry.seq = 'atcatcrUgatcg'
65 assert_equal('rna', @entry.type_guess)
68 def test_Seq_type_guess_with_dna_returns_dna
69 @entry.seq = 'atcatcgatcg'
70 assert_equal('dna', @entry.type_guess)
73 def test_Seq_type_guess_EM_without_sequence_raises
74 assert_raise(SeqError) { @entry.type_guess! }
77 def test_Seq_type_guess_EM_with_protein_returns_protein
78 @entry.seq = 'atcatcrFgatcg'
80 assert_equal('protein', @entry.type)
83 def test_Seq_type_guess_EM_with_rna_returns_rna
84 @entry.seq = 'atcatcrUgatcg'
86 assert_equal('rna', @entry.type)
89 def test_Seq_type_guess_EM_with_dna_returns_dna
90 @entry.seq = 'atcatcgatcg'
92 assert_equal('dna', @entry.type)
95 def test_Seq_length_is_correct
97 assert_equal(4, @entry.length)
100 def test_Seq_indels_is_correct
101 @entry.seq = 'ATCG.-~_'
102 assert_equal(4, @entry.indels)
105 def test_Seq_to_rna_raises_if_no_sequence
107 assert_raise(SeqError) { @entry.to_rna }
110 def test_Seq_to_rna_raises_on_bad_type
113 assert_raise(SeqError) { @entry.to_rna }
116 def test_Seq_to_rna_transcribes_correctly
117 @entry.seq = 'ATCGatcg'
119 assert_equal("AUCGaucg", @entry.to_rna)
122 def test_Seq_to_rna_changes_entry_type_to_rna
123 @entry.seq = 'ATCGatcg'
126 assert_equal("rna", @entry.type)
129 def test_Seq_to_dna_raises_if_no_sequence
131 assert_raise(SeqError) { @entry.to_dna }
134 def test_Seq_to_dna_raises_on_bad_type
137 assert_raise(SeqError) { @entry.to_dna }
140 def test_Seq_to_dna_transcribes_correctly
141 @entry.seq = 'AUCGaucg'
143 assert_equal("ATCGatcg", @entry.to_dna)
146 def test_Seq_to_dna_changes_entry_type_to_dna
147 @entry.seq = 'AUCGaucg'
150 assert_equal("dna", @entry.type)
153 def test_Seq_to_bp_returns_correct_record
154 @entry.seq_name = 'test'
156 assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
159 def test_Seq_to_bp_raises_on_missing_seq_name
161 assert_raise(SeqError) { @entry.to_bp }
164 def test_Seq_to_bp_raises_on_missing_sequence
165 @entry.seq_name = 'test'
166 assert_raise(SeqError) { @entry.to_bp }
169 def test_Seq_to_fasta_raises_on_missing_seq_name
171 assert_raise(SeqError) { @entry.to_fasta }
174 def test_Seq_to_fasta_raises_on_empty_seq_name
177 assert_raise(SeqError) { @entry.to_fasta }
180 def test_Seq_to_fasta_raises_on_missing_seq
181 @entry.seq_name = 'test'
182 assert_raise(SeqError) { @entry.to_fasta }
185 def test_Seq_to_fasta_raises_on_empty_seq
186 @entry.seq_name = 'test'
188 assert_raise(SeqError) { @entry.to_fasta }
191 def test_Seq_to_fasta_returns_correct_entry
192 @entry.seq_name = 'test'
194 assert_equal(">test\nATCG\n", @entry.to_fasta)
197 def test_Seq_to_fasta_wraps_correctly
198 entry = Seq.new("test", "ATCG")
199 assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
202 def test_Seq_to_fastq_returns_correct_entry
203 @entry.seq_name = 'test'
206 assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
209 def test_Seq_to_key_with_bad_residue_raises
210 entry = Seq.new("test", "AUCG")
211 assert_raise(SeqError) { entry.to_key }
214 def test_Seq_to_key_returns_correctly
215 entry = Seq.new("test", "ATCG")
216 assert_equal(54, entry.to_key)
219 def test_Seq_reverse_returns_correctly
221 assert_equal("GCTA", @entry.reverse.seq)
224 def test_Seq_complement_raises_if_no_sequence
226 assert_raise(SeqError) { @entry.complement }
229 def test_Seq_complement_raises_on_bad_type
231 @entry.type = 'protein'
232 assert_raise(SeqError) { @entry.complement }
235 def test_Seq_complement_for_DNA_is_correct
236 @entry.seq = 'ATCGatcg'
238 assert_equal("TAGCtagc", @entry.complement)
241 def test_Seq_complement_for_RNA_is_correct
242 @entry.seq = 'AUCGaucg'
244 assert_equal("UAGCuagc", @entry.complement)
247 def test_Seq_reverse_complement_for_DNA_is_correct
248 @entry.seq = 'ATCGatcg'
250 assert_equal("cgatCGAT", @entry.reverse_complement.seq)
253 def test_Seq_reverse_complement_for_RNA_is_correct
254 @entry.seq = 'AUCGaucg'
256 assert_equal("cgauCGAU", @entry.reverse_complement.seq)
259 def test_Seq_hamming_distance_returns_correctly
260 seq1 = Seq.new("test1", "ATCG")
261 seq2 = Seq.new("test2", "atgg")
262 assert_equal(1, seq1.hamming_distance(seq2))
265 def test_Seq_generate_with_length_lt_1_raises
266 assert_raise(SeqError) { @entry.generate(-10, "dna") }
267 assert_raise(SeqError) { @entry.generate(0, "dna") }
270 def test_Seq_generate_with_bad_type_raises
271 assert_raise(SeqError) { @entry.generate(10, "foo") }
274 def test_Seq_generate_with_ok_type_dont_raise
275 %w[dna DNA rna RNA protein Protein].each do |type|
276 assert_nothing_raised { @entry.generate(10, type) }
280 def test_Seq_subseq_with_start_lt_0_raises
282 assert_raise(SeqError) { @entry.subseq(-1, 1) }
285 def test_Seq_subseq_with_start_plus_length_gt_seq_raises
287 assert_raise(SeqError) { @entry.subseq(0, 5) }
290 def test_Seq_subseq_returns_correct_sequence
292 assert_equal("AT", @entry.subseq(0, 2).seq)
293 assert_equal("CG", @entry.subseq(2, 2).seq)
296 def test_Seq_subseq_without_len_returns_correct_sequence
298 assert_equal("ATCG", @entry.subseq(0).seq)
299 assert_equal("CG", @entry.subseq(2).seq)
302 def test_Seq_subseq_returns_correct_qual
305 assert_equal("ab", @entry.subseq(0, 2).qual)
306 assert_equal("cd", @entry.subseq(2, 2).qual)
309 def test_Seq_subseq_without_len_returns_correct_qual
312 assert_equal("abcd", @entry.subseq(0).qual)
313 assert_equal("cd", @entry.subseq(2).qual)
316 def test_Seq_subseq_bang_with_start_lt_0_raises
318 assert_raise(SeqError) { @entry.subseq!(-1, 1) }
321 def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
323 assert_raise(SeqError) { @entry.subseq!(0, 5) }
326 def test_Seq_subseq_bang_returns_correct_sequence
329 assert_equal("AT", @entry.seq)
332 assert_equal("CG", @entry.seq)
335 def test_Seq_subseq_bang_without_len_returns_correct_sequence
338 assert_equal("ATCG", @entry.seq)
341 assert_equal("CG", @entry.seq)
344 def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
348 assert_equal("ab", @entry.qual)
352 assert_equal("cd", @entry.qual)
355 def test_Seq_subseq_bang_with_pos_returns_correct_qual
359 assert_equal("abcd", @entry.qual)
363 assert_equal("cd", @entry.qual)
366 def test_Seq_subseq_rand_returns_correct_sequence
368 assert_equal("ATCG", @entry.subseq_rand(4).seq)
371 def test_Seq_indels_remove_without_qual_returns_correctly
372 @entry.seq = "A-T.CG~CG"
374 assert_equal("ATCGCG", @entry.indels_remove.seq)
377 def test_Seq_indels_remove_with_qual_returns_correctly
378 @entry.seq = "A-T.CG~CG"
379 @entry.qual = "a@b@cd@fg"
380 assert_equal("ATCGCG", @entry.indels_remove.seq)
381 assert_equal("abcdfg", @entry.indels_remove.qual)
384 def test_Seq_composition_returns_correctly
385 @entry.seq = "AAAATTTCCG"
386 assert_equal(4, @entry.composition["A"])
387 assert_equal(3, @entry.composition["T"])
388 assert_equal(2, @entry.composition["C"])
389 assert_equal(1, @entry.composition["G"])
390 assert_equal(0, @entry.composition["X"])
393 def test_Seq_homopol_max_returns_0_with_empty_sequence
395 assert_equal(0, @entry.homopol_max)
398 def test_Seq_homopol_max_returns_0_with_nil_sequence
400 assert_equal(0, @entry.homopol_max)
403 def test_Seq_homopol_max_returns_0_when_not_found
404 @entry.seq = "AtTcCcGggGnnNnn"
405 assert_equal(0, @entry.homopol_max(6))
408 def test_Seq_homopol_max_returns_correctly
409 @entry.seq = "AtTcCcGggGnnNnn"
410 assert_equal(5, @entry.homopol_max(3))
413 def test_Seq_hard_mask_returns_correctly
414 @entry.seq = "--AAAANn"
415 assert_equal(33.33, @entry.hard_mask)
418 def test_Seq_soft_mask_returns_correctly
419 @entry.seq = "--AAAa"
420 assert_equal(25.00, @entry.soft_mask)
423 def test_Seq_mask_seq_hard_bang_with_nil_seq_raises
427 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
430 def test_Seq_mask_seq_hard_bang_with_nil_qual_raises
434 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
437 def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises
438 assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
439 assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
442 def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise
446 assert_nothing_raised { @entry.mask_seq_hard!(0) }
447 assert_nothing_raised { @entry.mask_seq_hard!(40) }
450 def test_Seq_mask_seq_hard_bang_returns_correctly
452 @entry.qual = "RRSTU"
454 assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
457 def test_Seq_mask_seq_soft_bang_with_nil_seq_raises
461 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
464 def test_Seq_mask_seq_soft_bang_with_nil_qual_raises
468 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
471 def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises
472 assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
473 assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
476 def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise
480 assert_nothing_raised { @entry.mask_seq_soft!(0) }
481 assert_nothing_raised { @entry.mask_seq_soft!(40) }
484 def test_Seq_mask_seq_soft_bang_returns_correctly
486 @entry.qual = "RRSTU"
488 assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
491 # qual score detection
493 def test_Seq_qual_base33_returns_correctly
494 # self.qual.match(/[!-:]/)
495 @entry.qual = '!"#$%&\'()*+,-./0123456789:'
496 assert_equal(true, @entry.qual_base33? )
498 assert_equal(false, @entry.qual_base33? )
500 assert_equal(false, @entry.qual_base33? )
503 def test_Seq_qual_base64_returns_correctly
504 # self.qual.match(/[K-h]/)
505 @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
506 assert_equal(true, @entry.qual_base64? )
508 assert_equal(false, @entry.qual_base64? )
509 @entry.qual = 105.chr
510 assert_equal(false, @entry.qual_base64? )
513 def test_Seq_qual_valid_with_nil_qual_raises
514 assert_raise(SeqError) { @entry.qual_valid?("illumina1.8") }
517 def test_Seq_qual_valid_with_bad_encoding_raises
519 assert_raise(SeqError) { @entry.qual_valid?("foobar") }
522 def test_Seq_qual_valid_returns_correctly
523 tests = [["sanger", 0, 40, 33],
525 ["solexa", -5, 40, 64],
526 ["illumina13", 0, 40, 64],
527 ["illumina15", 0, 40, 64],
528 ["illumina18", 0, 41, 33]]
531 @entry.qual = (test[1] + test[-1]).chr + (test[2] + test[-1]).chr
532 assert_equal(true, @entry.qual_valid?(test[0]))
533 @entry.qual = (test[1] + test[-1] - 1).chr
534 assert_equal(false, @entry.qual_valid?(test[0]))
535 @entry.qual = (test[2] + test[-1] + 1).chr
536 assert_equal(false, @entry.qual_valid?(test[0]))
540 # convert sanger to ...
542 def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK
543 @entry.qual = 'BCDEFGHI'
544 assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'sanger').qual)
547 def test_Seq_convert_scores_bang_from_sanger_to_solexa_returns_OK
548 @entry.qual = 'BCDEFGHI'
549 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'solexa').qual)
552 def test_Seq_convert_scores_bang_from_sanger_to_illumina13_returns_OK
553 @entry.qual = 'BCDEFGHI'
554 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina13').qual)
557 def test_Seq_convert_scores_bang_from_sanger_to_illumina15_returns_OK
558 @entry.qual = 'BCDEFGHI'
559 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina15').qual)
562 def test_Seq_convert_scores_bang_from_sanger_to_illumina18_returns_OK
563 @entry.qual = 'BCDEFGHI'
564 assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'illumina18').qual)
567 # convert solexa to ...
569 def test_Seq_convert_scores_bang_from_solexa_to_sanger_returns_OK
570 @entry.qual = 'BCDEFGHI'
571 assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'sanger').qual)
574 def test_Seq_convert_scores_bang_from_solexa_to_solexa_returns_OK
575 @entry.qual = 'BCDEFGHI'
576 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'solexa').qual)
579 def test_Seq_convert_scores_bang_from_solexa_to_illumina13_returns_OK
580 @entry.qual = 'BCDEFGHI'
581 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina13').qual)
584 def test_Seq_convert_scores_bang_from_solexa_to_illumina15_returns_OK
585 @entry.qual = 'BCDEFGHI'
586 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina15').qual)
589 def test_Seq_convert_scores_bang_from_solexa_to_illumina18_returns_OK
590 @entry.qual = 'BCDEFGHI'
591 assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'illumina18').qual)
594 # convert illumina13 to ...
596 def test_Seq_convert_scores_bang_from_illumina13_to_sanger_returns_OK
597 @entry.qual = 'BCDEFGHI'
598 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'sanger').qual)
601 def test_Seq_convert_scores_bang_from_illumina13_to_solexa_returns_OK
602 @entry.qual = 'BCDEFGHI'
603 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'solexa').qual)
606 def test_Seq_convert_scores_bang_from_illumina13_to_illumina13_returns_OK
607 @entry.qual = 'BCDEFGHI'
608 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina13').qual)
611 def test_Seq_convert_scores_bang_from_illumina13_to_illumina15_returns_OK
612 @entry.qual = 'BCDEFGHI'
613 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina15').qual)
616 def test_Seq_convert_scores_bang_from_illumina13_to_illumina18_returns_OK
617 @entry.qual = 'BCDEFGHI'
618 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'illumina18').qual)
621 # convert illumina15 to ...
623 def test_Seq_convert_scores_bang_from_illumina15_to_sanger_returns_OK
624 @entry.qual = 'BCDEFGHI'
625 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'sanger').qual)
628 def test_Seq_convert_scores_bang_from_illumina15_to_solexa_returns_OK
629 @entry.qual = 'BCDEFGHI'
630 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'solexa').qual)
633 def test_Seq_convert_scores_bang_from_illumina15_to_illumina13_returns_OK
634 @entry.qual = 'BCDEFGHI'
635 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina13').qual)
638 def test_Seq_convert_scores_bang_from_illumina15_to_illumina15_returns_OK
639 @entry.qual = 'BCDEFGHI'
640 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina15').qual)
643 def test_Seq_convert_scores_bang_from_illumina15_to_illumina18_returns_OK
644 @entry.qual = 'BCDEFGHI'
645 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'illumina18').qual)
648 # convert illumina18 to ...
650 def test_Seq_convert_scores_bang_from_illumina18_to_sanger_returns_OK
651 @entry.qual = 'BCDEFGHI'
652 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'sanger').qual)
655 def test_Seq_convert_scores_bang_from_illumina18_to_solexa_returns_OK
656 @entry.qual = 'BCDEFGHI'
657 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'solexa').qual)
660 def test_Seq_convert_scores_bang_from_illumina18_to_illumina13_returns_OK
661 @entry.qual = 'BCDEFGHI'
662 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina13').qual)
665 def test_Seq_convert_scores_bang_from_illumina18_to_illumina15_returns_OK
666 @entry.qual = 'BCDEFGHI'
667 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina15').qual)
670 def test_Seq_convert_scores_bang_from_illumina18_to_illumina18_returns_OK
671 @entry.qual = 'BCDEFGHI'
672 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'illumina18').qual)
675 def test_Seq_scores_mean_without_qual_raises
677 assert_raise(SeqError) { @entry.scores_mean }
680 def test_Seq_scores_mean_returns_correctly
682 assert_equal(20.0, @entry.scores_mean)