7 class TestSeq < Test::Unit::TestCase
12 # # autoremoves whitespace, newlines, and carriage returns
15 # dna.seq = "A\tT\r\tC\nG "
16 # assert_equal(dna.seq, "ATCG")
19 def test_Seq_new_bp_returns_correctly
20 record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
21 seq = Seq.new_bp(record)
22 assert_equal("test", seq.seq_name)
23 assert_equal("ATCG", seq.seq)
24 assert_equal("dna", seq.type)
25 assert_equal("hhhh", seq.qual)
28 def test_Seq_is_dna_with_no_sequence_type_returns_false
29 assert(@entry.is_dna? == false)
32 def test_Seq_is_dna_with_dna_sequence_type_returns_true
34 assert(@entry.is_dna? == true)
37 def test_Seq_is_rna_with_no_sequence_type_returns_false
38 assert(@entry.is_rna? == false)
41 def test_Seq_is_rna_with_rna_sequence_type_returns_true
43 assert(@entry.is_rna? == true)
46 def test_Seq_is_protein_with_no_sequence_type_returns_false
47 assert(@entry.is_protein? == false)
50 def test_Seq_is_protein_with_protein_sequence_type_returns_true
51 @entry.type = 'protein'
52 assert_equal(true, @entry.is_protein?)
55 def test_Seq_type_guess_without_sequence_raises
56 assert_raise(SeqError) { @entry.type_guess }
59 def test_Seq_type_guess_with_protein_returns_protein
60 @entry.seq = 'atcatcrFgatcg'
61 assert_equal('protein', @entry.type_guess)
64 def test_Seq_type_guess_with_rna_returns_rna
65 @entry.seq = 'atcatcrUgatcg'
66 assert_equal('rna', @entry.type_guess)
69 def test_Seq_type_guess_with_dna_returns_dna
70 @entry.seq = 'atcatcgatcg'
71 assert_equal('dna', @entry.type_guess)
74 def test_Seq_type_guess_EM_without_sequence_raises
75 assert_raise(SeqError) { @entry.type_guess! }
78 def test_Seq_type_guess_EM_with_protein_returns_protein
79 @entry.seq = 'atcatcrFgatcg'
81 assert_equal('protein', @entry.type)
84 def test_Seq_type_guess_EM_with_rna_returns_rna
85 @entry.seq = 'atcatcrUgatcg'
87 assert_equal('rna', @entry.type)
90 def test_Seq_type_guess_EM_with_dna_returns_dna
91 @entry.seq = 'atcatcgatcg'
93 assert_equal('dna', @entry.type)
96 def test_Seq_length_is_correct
98 assert_equal(4, @entry.length)
101 def test_Seq_indels_is_correct
102 @entry.seq = 'ATCG.-~_'
103 assert_equal(4, @entry.indels)
106 def test_Seq_to_rna_raises_if_no_sequence
108 assert_raise(SeqError) { @entry.to_rna }
111 def test_Seq_to_rna_raises_on_bad_type
114 assert_raise(SeqError) { @entry.to_rna }
117 def test_Seq_to_rna_transcribes_correctly
118 @entry.seq = 'ATCGatcg'
120 assert_equal("AUCGaucg", @entry.to_rna)
123 def test_Seq_to_rna_changes_entry_type_to_rna
124 @entry.seq = 'ATCGatcg'
127 assert_equal("rna", @entry.type)
130 def test_Seq_to_dna_raises_if_no_sequence
132 assert_raise(SeqError) { @entry.to_dna }
135 def test_Seq_to_dna_raises_on_bad_type
138 assert_raise(SeqError) { @entry.to_dna }
141 def test_Seq_to_dna_transcribes_correctly
142 @entry.seq = 'AUCGaucg'
144 assert_equal("ATCGatcg", @entry.to_dna)
147 def test_Seq_to_dna_changes_entry_type_to_dna
148 @entry.seq = 'AUCGaucg'
151 assert_equal("dna", @entry.type)
154 def test_Seq_to_bp_returns_correct_record
155 @entry.seq_name = 'test'
157 assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
160 def test_Seq_to_bp_raises_on_missing_seq_name
162 assert_raise(SeqError) { @entry.to_bp }
165 def test_Seq_to_bp_raises_on_missing_sequence
166 @entry.seq_name = 'test'
167 assert_raise(SeqError) { @entry.to_bp }
170 def test_Seq_to_fasta_raises_on_missing_seq_name
172 assert_raise(SeqError) { @entry.to_fasta }
175 def test_Seq_to_fasta_raises_on_empty_seq_name
178 assert_raise(SeqError) { @entry.to_fasta }
181 def test_Seq_to_fasta_raises_on_missing_seq
182 @entry.seq_name = 'test'
183 assert_raise(SeqError) { @entry.to_fasta }
186 def test_Seq_to_fasta_raises_on_empty_seq
187 @entry.seq_name = 'test'
189 assert_raise(SeqError) { @entry.to_fasta }
192 def test_Seq_to_fasta_returns_correct_entry
193 @entry.seq_name = 'test'
195 assert_equal(">test\nATCG\n", @entry.to_fasta)
198 def test_Seq_to_fasta_wraps_correctly
199 entry = Seq.new("test", "ATCG")
200 assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
203 def test_Seq_to_fastq_returns_correct_entry
204 @entry.seq_name = 'test'
207 assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
210 def test_Seq_to_key_with_bad_residue_raises
211 entry = Seq.new("test", "AUCG")
212 assert_raise(SeqError) { entry.to_key }
215 def test_Seq_to_key_returns_correctly
216 entry = Seq.new("test", "ATCG")
217 assert_equal(54, entry.to_key)
220 def test_Seq_reverse_returns_correctly
222 new_entry = @entry.reverse
223 assert_equal("GCTA", new_entry.seq)
224 assert_equal("ATCG", @entry.seq)
227 def test_Seq_reverse_bang_returns_correctly
230 assert_equal("GCTA", @entry.seq)
233 def test_Seq_complement_raises_if_no_sequence
235 assert_raise(SeqError) { @entry.complement }
238 def test_Seq_complement_raises_on_bad_type
240 @entry.type = 'protein'
241 assert_raise(SeqError) { @entry.complement }
244 def test_Seq_complement_for_DNA_is_correct
245 @entry.seq = 'ATCGatcg'
247 comp = @entry.complement
248 assert_equal("TAGCtagc", comp.seq)
249 assert_equal("ATCGatcg", @entry.seq)
252 def test_Seq_complement_for_RNA_is_correct
253 @entry.seq = 'AUCGaucg'
255 comp = @entry.complement
256 assert_equal("UAGCuagc", comp.seq)
257 assert_equal("AUCGaucg", @entry.seq)
260 def test_Seq_complement_bang_raises_if_no_sequence
262 assert_raise(SeqError) { @entry.complement! }
265 def test_Seq_complement_bang_raises_on_bad_type
267 @entry.type = 'protein'
268 assert_raise(SeqError) { @entry.complement! }
271 def test_Seq_complement_bang_for_DNA_is_correct
272 @entry.seq = 'ATCGatcg'
274 assert_equal("TAGCtagc", @entry.complement!.seq)
277 def test_Seq_complement_bang_for_RNA_is_correct
278 @entry.seq = 'AUCGaucg'
280 assert_equal("UAGCuagc", @entry.complement!.seq)
284 def test_Seq_hamming_distance_returns_correctly
285 seq1 = Seq.new("test1", "ATCG")
286 seq2 = Seq.new("test2", "atgg")
287 assert_equal(1, seq1.hamming_distance(seq2))
290 def test_Seq_generate_with_length_lt_1_raises
291 assert_raise(SeqError) { @entry.generate(-10, "dna") }
292 assert_raise(SeqError) { @entry.generate(0, "dna") }
295 def test_Seq_generate_with_bad_type_raises
296 assert_raise(SeqError) { @entry.generate(10, "foo") }
299 def test_Seq_generate_with_ok_type_dont_raise
300 %w[dna DNA rna RNA protein Protein].each do |type|
301 assert_nothing_raised { @entry.generate(10, type) }
305 def test_Seq_shuffle_returns_correctly
306 orig = "actgactgactgatcgatcgatcgatcgtactg"
307 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
308 entry_shuf = @entry.shuffle
309 assert_equal(orig, @entry.seq)
310 assert_not_equal(@entry.seq, entry_shuf.seq)
313 def test_Seq_shuffle_bang_returns_correctly
314 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
315 assert_not_equal(@entry.seq, @entry.shuffle!.seq)
318 def test_Seq_subseq_with_start_lt_0_raises
320 assert_raise(SeqError) { @entry.subseq(-1, 1) }
323 def test_Seq_subseq_with_start_plus_length_gt_seq_raises
325 assert_raise(SeqError) { @entry.subseq(0, 5) }
328 def test_Seq_subseq_returns_correct_sequence
330 assert_equal("AT", @entry.subseq(0, 2).seq)
331 assert_equal("CG", @entry.subseq(2, 2).seq)
334 def test_Seq_subseq_without_len_returns_correct_sequence
336 assert_equal("ATCG", @entry.subseq(0).seq)
337 assert_equal("CG", @entry.subseq(2).seq)
340 def test_Seq_subseq_returns_correct_qual
343 assert_equal("ab", @entry.subseq(0, 2).qual)
344 assert_equal("cd", @entry.subseq(2, 2).qual)
347 def test_Seq_subseq_without_len_returns_correct_qual
350 assert_equal("abcd", @entry.subseq(0).qual)
351 assert_equal("cd", @entry.subseq(2).qual)
354 def test_Seq_subseq_bang_with_start_lt_0_raises
356 assert_raise(SeqError) { @entry.subseq!(-1, 1) }
359 def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
361 assert_raise(SeqError) { @entry.subseq!(0, 5) }
364 def test_Seq_subseq_bang_returns_correct_sequence
367 assert_equal("AT", @entry.seq)
370 assert_equal("CG", @entry.seq)
373 def test_Seq_subseq_bang_without_len_returns_correct_sequence
376 assert_equal("ATCG", @entry.seq)
379 assert_equal("CG", @entry.seq)
382 def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
386 assert_equal("ab", @entry.qual)
390 assert_equal("cd", @entry.qual)
393 def test_Seq_subseq_bang_with_pos_returns_correct_qual
397 assert_equal("abcd", @entry.qual)
401 assert_equal("cd", @entry.qual)
404 def test_Seq_subseq_rand_returns_correct_sequence
406 assert_equal("ATCG", @entry.subseq_rand(4).seq)
409 def test_Seq_indels_remove_without_qual_returns_correctly
410 @entry.seq = "A-T.CG~CG"
412 assert_equal("ATCGCG", @entry.indels_remove.seq)
415 def test_Seq_indels_remove_with_qual_returns_correctly
416 @entry.seq = "A-T.CG~CG"
417 @entry.qual = "a@b@cd@fg"
418 assert_equal("ATCGCG", @entry.indels_remove.seq)
419 assert_equal("abcdfg", @entry.indels_remove.qual)
422 def test_Seq_composition_returns_correctly
423 @entry.seq = "AAAATTTCCG"
424 assert_equal(4, @entry.composition["A"])
425 assert_equal(3, @entry.composition["T"])
426 assert_equal(2, @entry.composition["C"])
427 assert_equal(1, @entry.composition["G"])
428 assert_equal(0, @entry.composition["X"])
431 def test_Seq_homopol_max_returns_0_with_empty_sequence
433 assert_equal(0, @entry.homopol_max)
436 def test_Seq_homopol_max_returns_0_with_nil_sequence
438 assert_equal(0, @entry.homopol_max)
441 def test_Seq_homopol_max_returns_0_when_not_found
442 @entry.seq = "AtTcCcGggGnnNnn"
443 assert_equal(0, @entry.homopol_max(6))
446 def test_Seq_homopol_max_returns_correctly
447 @entry.seq = "AtTcCcGggGnnNnn"
448 assert_equal(5, @entry.homopol_max(3))
451 def test_Seq_hard_mask_returns_correctly
452 @entry.seq = "--AAAANn"
453 assert_equal(33.33, @entry.hard_mask)
456 def test_Seq_soft_mask_returns_correctly
457 @entry.seq = "--AAAa"
458 assert_equal(25.00, @entry.soft_mask)
461 def test_Seq_mask_seq_hard_bang_with_nil_seq_raises
465 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
468 def test_Seq_mask_seq_hard_bang_with_nil_qual_raises
472 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
475 def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises
476 assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
477 assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
480 def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise
484 assert_nothing_raised { @entry.mask_seq_hard!(0) }
485 assert_nothing_raised { @entry.mask_seq_hard!(40) }
488 def test_Seq_mask_seq_hard_bang_returns_correctly
490 @entry.qual = "RRSTU"
492 assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
495 def test_Seq_mask_seq_soft_bang_with_nil_seq_raises
499 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
502 def test_Seq_mask_seq_soft_bang_with_nil_qual_raises
506 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
509 def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises
510 assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
511 assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
514 def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise
518 assert_nothing_raised { @entry.mask_seq_soft!(0) }
519 assert_nothing_raised { @entry.mask_seq_soft!(40) }
522 def test_Seq_mask_seq_soft_bang_returns_correctly
524 @entry.qual = "RRSTU"
526 assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
529 # qual score detection
531 def test_Seq_qual_base33_returns_correctly
532 # self.qual.match(/[!-:]/)
533 @entry.qual = '!"#$%&\'()*+,-./0123456789:'
534 assert_equal(true, @entry.qual_base33? )
536 assert_equal(false, @entry.qual_base33? )
538 assert_equal(false, @entry.qual_base33? )
541 def test_Seq_qual_base64_returns_correctly
542 # self.qual.match(/[K-h]/)
543 @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
544 assert_equal(true, @entry.qual_base64? )
546 assert_equal(false, @entry.qual_base64? )
547 @entry.qual = 105.chr
548 assert_equal(false, @entry.qual_base64? )
551 def test_Seq_qual_valid_with_nil_qual_raises
552 assert_raise(SeqError) { @entry.qual_valid?("illumina1.8") }
555 def test_Seq_qual_valid_with_bad_encoding_raises
557 assert_raise(SeqError) { @entry.qual_valid?("foobar") }
560 def test_Seq_qual_valid_returns_correctly
561 tests = [["sanger", 0, 93, 33],
563 ["solexa", -5, 62, 64],
564 ["illumina13", 0, 62, 64],
565 ["illumina15", 0, 62, 64],
566 ["illumina18", 0, 93, 33]]
569 @entry.qual = (test[1] + test[-1]).chr + (test[2] + test[-1]).chr
570 assert_equal(true, @entry.qual_valid?(test[0]))
571 @entry.qual = (test[1] + test[-1] - 1).chr
572 assert_equal(false, @entry.qual_valid?(test[0]))
573 @entry.qual = (test[2] + test[-1] + 1).chr
574 assert_equal(false, @entry.qual_valid?(test[0]))
578 # convert sanger to ...
580 def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK
581 @entry.qual = 'BCDEFGHI'
582 assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'sanger').qual)
585 def test_Seq_convert_scores_bang_from_sanger_to_solexa_returns_OK
586 @entry.qual = 'BCDEFGHI'
587 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'solexa').qual)
590 def test_Seq_convert_scores_bang_from_sanger_to_illumina13_returns_OK
591 @entry.qual = 'BCDEFGHI'
592 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina13').qual)
595 def test_Seq_convert_scores_bang_from_sanger_to_illumina15_returns_OK
596 @entry.qual = 'BCDEFGHI'
597 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina15').qual)
600 def test_Seq_convert_scores_bang_from_sanger_to_illumina18_returns_OK
601 @entry.qual = 'BCDEFGHI'
602 assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'illumina18').qual)
605 # convert solexa to ...
607 def test_Seq_convert_scores_bang_from_solexa_to_sanger_returns_OK
608 @entry.qual = 'BCDEFGHI'
609 assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'sanger').qual)
612 def test_Seq_convert_scores_bang_from_solexa_to_solexa_returns_OK
613 @entry.qual = 'BCDEFGHI'
614 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'solexa').qual)
617 def test_Seq_convert_scores_bang_from_solexa_to_illumina13_returns_OK
618 @entry.qual = 'BCDEFGHI'
619 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina13').qual)
622 def test_Seq_convert_scores_bang_from_solexa_to_illumina15_returns_OK
623 @entry.qual = 'BCDEFGHI'
624 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina15').qual)
627 def test_Seq_convert_scores_bang_from_solexa_to_illumina18_returns_OK
628 @entry.qual = 'BCDEFGHI'
629 assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'illumina18').qual)
632 # convert illumina13 to ...
634 def test_Seq_convert_scores_bang_from_illumina13_to_sanger_returns_OK
635 @entry.qual = 'BCDEFGHI'
636 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'sanger').qual)
639 def test_Seq_convert_scores_bang_from_illumina13_to_solexa_returns_OK
640 @entry.qual = 'BCDEFGHI'
641 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'solexa').qual)
644 def test_Seq_convert_scores_bang_from_illumina13_to_illumina13_returns_OK
645 @entry.qual = 'BCDEFGHI'
646 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina13').qual)
649 def test_Seq_convert_scores_bang_from_illumina13_to_illumina15_returns_OK
650 @entry.qual = 'BCDEFGHI'
651 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina15').qual)
654 def test_Seq_convert_scores_bang_from_illumina13_to_illumina18_returns_OK
655 @entry.qual = 'BCDEFGHI'
656 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'illumina18').qual)
659 # convert illumina15 to ...
661 def test_Seq_convert_scores_bang_from_illumina15_to_sanger_returns_OK
662 @entry.qual = 'BCDEFGHI'
663 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'sanger').qual)
666 def test_Seq_convert_scores_bang_from_illumina15_to_solexa_returns_OK
667 @entry.qual = 'BCDEFGHI'
668 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'solexa').qual)
671 def test_Seq_convert_scores_bang_from_illumina15_to_illumina13_returns_OK
672 @entry.qual = 'BCDEFGHI'
673 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina13').qual)
676 def test_Seq_convert_scores_bang_from_illumina15_to_illumina15_returns_OK
677 @entry.qual = 'BCDEFGHI'
678 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina15').qual)
681 def test_Seq_convert_scores_bang_from_illumina15_to_illumina18_returns_OK
682 @entry.qual = 'BCDEFGHI'
683 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'illumina18').qual)
686 # convert illumina18 to ...
688 def test_Seq_convert_scores_bang_from_illumina18_to_sanger_returns_OK
689 @entry.qual = 'BCDEFGHI'
690 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'sanger').qual)
693 def test_Seq_convert_scores_bang_from_illumina18_to_solexa_returns_OK
694 @entry.qual = 'BCDEFGHI'
695 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'solexa').qual)
698 def test_Seq_convert_scores_bang_from_illumina18_to_illumina13_returns_OK
699 @entry.qual = 'BCDEFGHI'
700 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina13').qual)
703 def test_Seq_convert_scores_bang_from_illumina18_to_illumina15_returns_OK
704 @entry.qual = 'BCDEFGHI'
705 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina15').qual)
708 def test_Seq_convert_scores_bang_from_illumina18_to_illumina18_returns_OK
709 @entry.qual = 'BCDEFGHI'
710 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'illumina18').qual)
713 def test_Seq_scores_mean_without_qual_raises
715 assert_raise(SeqError) { @entry.scores_mean }
718 def test_Seq_scores_mean_returns_correctly
720 assert_equal(20.0, @entry.scores_mean)