7 class TestSeq < Test::Unit::TestCase
12 # # autoremoves whitespace, newlines, and carriage returns
15 # dna.seq = "A\tT\r\tC\nG "
16 # assert_equal(dna.seq, "ATCG")
19 def test_Seq_new_bp_returns_correctly
20 record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
21 seq = Seq.new_bp(record)
22 assert_equal("test", seq.seq_name)
23 assert_equal("ATCG", seq.seq)
24 assert_equal("dna", seq.type)
25 assert_equal("hhhh", seq.qual)
28 def test_Seq_is_dna_with_no_sequence_type_returns_false
29 assert(@entry.is_dna? == false)
32 def test_Seq_is_dna_with_dna_sequence_type_returns_true
34 assert(@entry.is_dna? == true)
37 def test_Seq_is_rna_with_no_sequence_type_returns_false
38 assert(@entry.is_rna? == false)
41 def test_Seq_is_rna_with_rna_sequence_type_returns_true
43 assert(@entry.is_rna? == true)
46 def test_Seq_is_protein_with_no_sequence_type_returns_false
47 assert(@entry.is_protein? == false)
50 def test_Seq_is_protein_with_protein_sequence_type_returns_true
51 @entry.type = 'protein'
52 assert_equal(true, @entry.is_protein?)
55 def test_Seq_type_guess_without_sequence_raises
56 assert_raise(SeqError) { @entry.type_guess }
59 def test_Seq_type_guess_with_protein_returns_protein
60 @entry.seq = 'atcatcrFgatcg'
61 assert_equal('protein', @entry.type_guess)
64 def test_Seq_type_guess_with_rna_returns_rna
65 @entry.seq = 'atcatcrUgatcg'
66 assert_equal('rna', @entry.type_guess)
69 def test_Seq_type_guess_with_dna_returns_dna
70 @entry.seq = 'atcatcgatcg'
71 assert_equal('dna', @entry.type_guess)
74 def test_Seq_type_guess_EM_without_sequence_raises
75 assert_raise(SeqError) { @entry.type_guess! }
78 def test_Seq_type_guess_EM_with_protein_returns_protein
79 @entry.seq = 'atcatcrFgatcg'
81 assert_equal('protein', @entry.type)
84 def test_Seq_type_guess_EM_with_rna_returns_rna
85 @entry.seq = 'atcatcrUgatcg'
87 assert_equal('rna', @entry.type)
90 def test_Seq_type_guess_EM_with_dna_returns_dna
91 @entry.seq = 'atcatcgatcg'
93 assert_equal('dna', @entry.type)
96 def test_Seq_length_is_correct
98 assert_equal(4, @entry.length)
101 def test_Seq_indels_is_correct
102 @entry.seq = 'ATCG.-~_'
103 assert_equal(4, @entry.indels)
106 def test_Seq_to_rna_raises_if_no_sequence
108 assert_raise(SeqError) { @entry.to_rna }
111 def test_Seq_to_rna_raises_on_bad_type
114 assert_raise(SeqError) { @entry.to_rna }
117 def test_Seq_to_rna_transcribes_correctly
118 @entry.seq = 'ATCGatcg'
120 assert_equal("AUCGaucg", @entry.to_rna)
123 def test_Seq_to_rna_changes_entry_type_to_rna
124 @entry.seq = 'ATCGatcg'
127 assert_equal("rna", @entry.type)
130 def test_Seq_to_dna_raises_if_no_sequence
132 assert_raise(SeqError) { @entry.to_dna }
135 def test_Seq_to_dna_raises_on_bad_type
138 assert_raise(SeqError) { @entry.to_dna }
141 def test_Seq_to_dna_transcribes_correctly
142 @entry.seq = 'AUCGaucg'
144 assert_equal("ATCGatcg", @entry.to_dna)
147 def test_Seq_to_dna_changes_entry_type_to_dna
148 @entry.seq = 'AUCGaucg'
151 assert_equal("dna", @entry.type)
154 def test_Seq_to_bp_returns_correct_record
155 @entry.seq_name = 'test'
157 assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
160 def test_Seq_to_bp_raises_on_missing_seq_name
162 assert_raise(SeqError) { @entry.to_bp }
165 def test_Seq_to_bp_raises_on_missing_sequence
166 @entry.seq_name = 'test'
167 assert_raise(SeqError) { @entry.to_bp }
170 def test_Seq_to_fasta_raises_on_missing_seq_name
172 assert_raise(SeqError) { @entry.to_fasta }
175 def test_Seq_to_fasta_raises_on_empty_seq_name
178 assert_raise(SeqError) { @entry.to_fasta }
181 def test_Seq_to_fasta_raises_on_missing_seq
182 @entry.seq_name = 'test'
183 assert_raise(SeqError) { @entry.to_fasta }
186 def test_Seq_to_fasta_raises_on_empty_seq
187 @entry.seq_name = 'test'
189 assert_raise(SeqError) { @entry.to_fasta }
192 def test_Seq_to_fasta_returns_correct_entry
193 @entry.seq_name = 'test'
195 assert_equal(">test\nATCG\n", @entry.to_fasta)
198 def test_Seq_to_fasta_wraps_correctly
199 entry = Seq.new("test", "ATCG")
200 assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
203 def test_Seq_to_fastq_returns_correct_entry
204 @entry.seq_name = 'test'
207 assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
210 def test_Seq_to_key_with_bad_residue_raises
211 entry = Seq.new("test", "AUCG")
212 assert_raise(SeqError) { entry.to_key }
215 def test_Seq_to_key_returns_correctly
216 entry = Seq.new("test", "ATCG")
217 assert_equal(54, entry.to_key)
220 def test_Seq_reverse_returns_correctly
222 assert_equal("GCTA", @entry.reverse.seq)
225 def test_Seq_complement_raises_if_no_sequence
227 assert_raise(SeqError) { @entry.complement }
230 def test_Seq_complement_raises_on_bad_type
232 @entry.type = 'protein'
233 assert_raise(SeqError) { @entry.complement }
236 def test_Seq_complement_for_DNA_is_correct
237 @entry.seq = 'ATCGatcg'
239 assert_equal("TAGCtagc", @entry.complement)
242 def test_Seq_complement_for_RNA_is_correct
243 @entry.seq = 'AUCGaucg'
245 assert_equal("UAGCuagc", @entry.complement)
248 def test_Seq_reverse_complement_for_DNA_is_correct
249 @entry.seq = 'ATCGatcg'
251 assert_equal("cgatCGAT", @entry.reverse_complement.seq)
254 def test_Seq_reverse_complement_for_RNA_is_correct
255 @entry.seq = 'AUCGaucg'
257 assert_equal("cgauCGAU", @entry.reverse_complement.seq)
260 def test_Seq_hamming_distance_returns_correctly
261 seq1 = Seq.new("test1", "ATCG")
262 seq2 = Seq.new("test2", "atgg")
263 assert_equal(1, seq1.hamming_distance(seq2))
266 def test_Seq_generate_with_length_lt_1_raises
267 assert_raise(SeqError) { @entry.generate(-10, "dna") }
268 assert_raise(SeqError) { @entry.generate(0, "dna") }
271 def test_Seq_generate_with_bad_type_raises
272 assert_raise(SeqError) { @entry.generate(10, "foo") }
275 def test_Seq_generate_with_ok_type_dont_raise
276 %w[dna DNA rna RNA protein Protein].each do |type|
277 assert_nothing_raised { @entry.generate(10, type) }
281 def test_Seq_shuffle_returns_correctly
282 orig = "actgactgactgatcgatcgatcgatcgtactg"
283 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
284 entry_shuf = @entry.shuffle
285 assert_equal(orig, @entry.seq)
286 assert_not_equal(@entry.seq, entry_shuf.seq)
289 def test_Seq_shuffle_bang_returns_correctly
290 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
291 assert_not_equal(@entry.seq, @entry.shuffle!.seq)
294 def test_Seq_subseq_with_start_lt_0_raises
296 assert_raise(SeqError) { @entry.subseq(-1, 1) }
299 def test_Seq_subseq_with_start_plus_length_gt_seq_raises
301 assert_raise(SeqError) { @entry.subseq(0, 5) }
304 def test_Seq_subseq_returns_correct_sequence
306 assert_equal("AT", @entry.subseq(0, 2).seq)
307 assert_equal("CG", @entry.subseq(2, 2).seq)
310 def test_Seq_subseq_without_len_returns_correct_sequence
312 assert_equal("ATCG", @entry.subseq(0).seq)
313 assert_equal("CG", @entry.subseq(2).seq)
316 def test_Seq_subseq_returns_correct_qual
319 assert_equal("ab", @entry.subseq(0, 2).qual)
320 assert_equal("cd", @entry.subseq(2, 2).qual)
323 def test_Seq_subseq_without_len_returns_correct_qual
326 assert_equal("abcd", @entry.subseq(0).qual)
327 assert_equal("cd", @entry.subseq(2).qual)
330 def test_Seq_subseq_bang_with_start_lt_0_raises
332 assert_raise(SeqError) { @entry.subseq!(-1, 1) }
335 def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
337 assert_raise(SeqError) { @entry.subseq!(0, 5) }
340 def test_Seq_subseq_bang_returns_correct_sequence
343 assert_equal("AT", @entry.seq)
346 assert_equal("CG", @entry.seq)
349 def test_Seq_subseq_bang_without_len_returns_correct_sequence
352 assert_equal("ATCG", @entry.seq)
355 assert_equal("CG", @entry.seq)
358 def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
362 assert_equal("ab", @entry.qual)
366 assert_equal("cd", @entry.qual)
369 def test_Seq_subseq_bang_with_pos_returns_correct_qual
373 assert_equal("abcd", @entry.qual)
377 assert_equal("cd", @entry.qual)
380 def test_Seq_subseq_rand_returns_correct_sequence
382 assert_equal("ATCG", @entry.subseq_rand(4).seq)
385 def test_Seq_indels_remove_without_qual_returns_correctly
386 @entry.seq = "A-T.CG~CG"
388 assert_equal("ATCGCG", @entry.indels_remove.seq)
391 def test_Seq_indels_remove_with_qual_returns_correctly
392 @entry.seq = "A-T.CG~CG"
393 @entry.qual = "a@b@cd@fg"
394 assert_equal("ATCGCG", @entry.indels_remove.seq)
395 assert_equal("abcdfg", @entry.indels_remove.qual)
398 def test_Seq_composition_returns_correctly
399 @entry.seq = "AAAATTTCCG"
400 assert_equal(4, @entry.composition["A"])
401 assert_equal(3, @entry.composition["T"])
402 assert_equal(2, @entry.composition["C"])
403 assert_equal(1, @entry.composition["G"])
404 assert_equal(0, @entry.composition["X"])
407 def test_Seq_homopol_max_returns_0_with_empty_sequence
409 assert_equal(0, @entry.homopol_max)
412 def test_Seq_homopol_max_returns_0_with_nil_sequence
414 assert_equal(0, @entry.homopol_max)
417 def test_Seq_homopol_max_returns_0_when_not_found
418 @entry.seq = "AtTcCcGggGnnNnn"
419 assert_equal(0, @entry.homopol_max(6))
422 def test_Seq_homopol_max_returns_correctly
423 @entry.seq = "AtTcCcGggGnnNnn"
424 assert_equal(5, @entry.homopol_max(3))
427 def test_Seq_hard_mask_returns_correctly
428 @entry.seq = "--AAAANn"
429 assert_equal(33.33, @entry.hard_mask)
432 def test_Seq_soft_mask_returns_correctly
433 @entry.seq = "--AAAa"
434 assert_equal(25.00, @entry.soft_mask)
437 def test_Seq_mask_seq_hard_bang_with_nil_seq_raises
441 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
444 def test_Seq_mask_seq_hard_bang_with_nil_qual_raises
448 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
451 def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises
452 assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
453 assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
456 def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise
460 assert_nothing_raised { @entry.mask_seq_hard!(0) }
461 assert_nothing_raised { @entry.mask_seq_hard!(40) }
464 def test_Seq_mask_seq_hard_bang_returns_correctly
466 @entry.qual = "RRSTU"
468 assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
471 def test_Seq_mask_seq_soft_bang_with_nil_seq_raises
475 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
478 def test_Seq_mask_seq_soft_bang_with_nil_qual_raises
482 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
485 def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises
486 assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
487 assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
490 def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise
494 assert_nothing_raised { @entry.mask_seq_soft!(0) }
495 assert_nothing_raised { @entry.mask_seq_soft!(40) }
498 def test_Seq_mask_seq_soft_bang_returns_correctly
500 @entry.qual = "RRSTU"
502 assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
505 # qual score detection
507 def test_Seq_qual_base33_returns_correctly
508 # self.qual.match(/[!-:]/)
509 @entry.qual = '!"#$%&\'()*+,-./0123456789:'
510 assert_equal(true, @entry.qual_base33? )
512 assert_equal(false, @entry.qual_base33? )
514 assert_equal(false, @entry.qual_base33? )
517 def test_Seq_qual_base64_returns_correctly
518 # self.qual.match(/[K-h]/)
519 @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
520 assert_equal(true, @entry.qual_base64? )
522 assert_equal(false, @entry.qual_base64? )
523 @entry.qual = 105.chr
524 assert_equal(false, @entry.qual_base64? )
527 def test_Seq_qual_valid_with_nil_qual_raises
528 assert_raise(SeqError) { @entry.qual_valid?("illumina1.8") }
531 def test_Seq_qual_valid_with_bad_encoding_raises
533 assert_raise(SeqError) { @entry.qual_valid?("foobar") }
536 def test_Seq_qual_valid_returns_correctly
537 tests = [["sanger", 0, 93, 33],
539 ["solexa", -5, 62, 64],
540 ["illumina13", 0, 62, 64],
541 ["illumina15", 0, 62, 64],
542 ["illumina18", 0, 93, 33]]
545 @entry.qual = (test[1] + test[-1]).chr + (test[2] + test[-1]).chr
546 assert_equal(true, @entry.qual_valid?(test[0]))
547 @entry.qual = (test[1] + test[-1] - 1).chr
548 assert_equal(false, @entry.qual_valid?(test[0]))
549 @entry.qual = (test[2] + test[-1] + 1).chr
550 assert_equal(false, @entry.qual_valid?(test[0]))
554 # convert sanger to ...
556 def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK
557 @entry.qual = 'BCDEFGHI'
558 assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'sanger').qual)
561 def test_Seq_convert_scores_bang_from_sanger_to_solexa_returns_OK
562 @entry.qual = 'BCDEFGHI'
563 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'solexa').qual)
566 def test_Seq_convert_scores_bang_from_sanger_to_illumina13_returns_OK
567 @entry.qual = 'BCDEFGHI'
568 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina13').qual)
571 def test_Seq_convert_scores_bang_from_sanger_to_illumina15_returns_OK
572 @entry.qual = 'BCDEFGHI'
573 assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina15').qual)
576 def test_Seq_convert_scores_bang_from_sanger_to_illumina18_returns_OK
577 @entry.qual = 'BCDEFGHI'
578 assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'illumina18').qual)
581 # convert solexa to ...
583 def test_Seq_convert_scores_bang_from_solexa_to_sanger_returns_OK
584 @entry.qual = 'BCDEFGHI'
585 assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'sanger').qual)
588 def test_Seq_convert_scores_bang_from_solexa_to_solexa_returns_OK
589 @entry.qual = 'BCDEFGHI'
590 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'solexa').qual)
593 def test_Seq_convert_scores_bang_from_solexa_to_illumina13_returns_OK
594 @entry.qual = 'BCDEFGHI'
595 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina13').qual)
598 def test_Seq_convert_scores_bang_from_solexa_to_illumina15_returns_OK
599 @entry.qual = 'BCDEFGHI'
600 assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina15').qual)
603 def test_Seq_convert_scores_bang_from_solexa_to_illumina18_returns_OK
604 @entry.qual = 'BCDEFGHI'
605 assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'illumina18').qual)
608 # convert illumina13 to ...
610 def test_Seq_convert_scores_bang_from_illumina13_to_sanger_returns_OK
611 @entry.qual = 'BCDEFGHI'
612 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'sanger').qual)
615 def test_Seq_convert_scores_bang_from_illumina13_to_solexa_returns_OK
616 @entry.qual = 'BCDEFGHI'
617 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'solexa').qual)
620 def test_Seq_convert_scores_bang_from_illumina13_to_illumina13_returns_OK
621 @entry.qual = 'BCDEFGHI'
622 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina13').qual)
625 def test_Seq_convert_scores_bang_from_illumina13_to_illumina15_returns_OK
626 @entry.qual = 'BCDEFGHI'
627 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina15').qual)
630 def test_Seq_convert_scores_bang_from_illumina13_to_illumina18_returns_OK
631 @entry.qual = 'BCDEFGHI'
632 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'illumina18').qual)
635 # convert illumina15 to ...
637 def test_Seq_convert_scores_bang_from_illumina15_to_sanger_returns_OK
638 @entry.qual = 'BCDEFGHI'
639 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'sanger').qual)
642 def test_Seq_convert_scores_bang_from_illumina15_to_solexa_returns_OK
643 @entry.qual = 'BCDEFGHI'
644 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'solexa').qual)
647 def test_Seq_convert_scores_bang_from_illumina15_to_illumina13_returns_OK
648 @entry.qual = 'BCDEFGHI'
649 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina13').qual)
652 def test_Seq_convert_scores_bang_from_illumina15_to_illumina15_returns_OK
653 @entry.qual = 'BCDEFGHI'
654 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina15').qual)
657 def test_Seq_convert_scores_bang_from_illumina15_to_illumina18_returns_OK
658 @entry.qual = 'BCDEFGHI'
659 assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'illumina18').qual)
662 # convert illumina18 to ...
664 def test_Seq_convert_scores_bang_from_illumina18_to_sanger_returns_OK
665 @entry.qual = 'BCDEFGHI'
666 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'sanger').qual)
669 def test_Seq_convert_scores_bang_from_illumina18_to_solexa_returns_OK
670 @entry.qual = 'BCDEFGHI'
671 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'solexa').qual)
674 def test_Seq_convert_scores_bang_from_illumina18_to_illumina13_returns_OK
675 @entry.qual = 'BCDEFGHI'
676 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina13').qual)
679 def test_Seq_convert_scores_bang_from_illumina18_to_illumina15_returns_OK
680 @entry.qual = 'BCDEFGHI'
681 assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina15').qual)
684 def test_Seq_convert_scores_bang_from_illumina18_to_illumina18_returns_OK
685 @entry.qual = 'BCDEFGHI'
686 assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'illumina18').qual)
689 def test_Seq_scores_mean_without_qual_raises
691 assert_raise(SeqError) { @entry.scores_mean }
694 def test_Seq_scores_mean_returns_correctly
696 assert_equal(20.0, @entry.scores_mean)