2 $:.unshift File.join(File.dirname(__FILE__), '..', '..')
4 # Copyright (C) 2011 Martin A. Hansen.
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # This software is part of the Biopieces framework (www.biopieces.org).
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 class TestSeq < Test::Unit::TestCase
37 test "Seq.new_bp returns correctly" do
38 record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => :dna, :SCORES => "hhhh"}
39 seq = Seq.new_bp(record)
40 assert_equal("test", seq.seq_name)
41 assert_equal("ATCG", seq.seq)
42 assert_equal(:dna, seq.type)
43 assert_equal("hhhh", seq.qual)
46 test "#is_dna? with no sequence type returns false" do
47 assert(@entry.is_dna? == false)
50 test "#is_dna? with dna sequence type returns true" do
52 assert(@entry.is_dna? == true)
55 test "#is_rna? with no sequence type returns false" do
56 assert(@entry.is_rna? == false)
59 test "#is_rna? with rna sequence type returns true" do
61 assert(@entry.is_rna? == true)
64 test "#is_protein? with no sequence type returns false" do
65 assert(@entry.is_protein? == false)
68 test "#is_protein? with protein sequence type returns true" do
69 @entry.type = :protein
70 assert_equal(true, @entry.is_protein?)
73 test "#type_guess without sequence raises" do
74 assert_raise(SeqError) { @entry.type_guess }
77 test "#type_guess with protein returns protein" do
78 @entry.seq = 'atcatcrFgatcg'
79 assert_equal(:protein, @entry.type_guess)
82 test "#type_guess with rna returns rna" do
83 @entry.seq = 'atcatcrUgatcg'
84 assert_equal(:rna, @entry.type_guess)
87 test "#type_guess with dna returns dna" do
88 @entry.seq = 'atcatcgatcg'
89 assert_equal(:dna, @entry.type_guess)
92 test "#type_guess! without sequence raises" do
93 assert_raise(SeqError) { @entry.type_guess! }
96 test "#type_guess! with protein returns protein" do
97 @entry.seq = 'atcatcrFgatcg'
99 assert_equal(:protein, @entry.type)
102 test "#type_guess! with rna returns rna" do
103 @entry.seq = 'atcatcrUgatcg'
105 assert_equal(:rna, @entry.type)
108 test "#type_guess! with dna returns dna" do
109 @entry.seq = 'atcatcgatcg'
111 assert_equal(:dna, @entry.type)
114 test "#length returns corretly" do
116 assert_equal(4, @entry.length)
119 test "#indels returns correctly" do
120 @entry.seq = 'ATCG.-~_'
121 assert_equal(4, @entry.indels)
124 test "#to_rna with no sequence raises" do
126 assert_raise(SeqError) { @entry.to_rna }
129 test "#to_rna with bad type raises" do
132 assert_raise(SeqError) { @entry.to_rna }
135 test "#to_rna transcribes correctly" do
136 @entry.seq = 'ATCGatcg'
138 assert_equal("AUCGaucg", @entry.to_rna)
141 test "#to_rna changes entry type to rna" do
142 @entry.seq = 'ATCGatcg'
145 assert_equal(:rna, @entry.type)
148 test "#to_dna with no sequence raises" do
150 assert_raise(SeqError) { @entry.to_dna }
153 test "#to_dna with bad type raises" do
156 assert_raise(SeqError) { @entry.to_dna }
159 test "#to_dna transcribes correctly" do
160 @entry.seq = 'AUCGaucg'
162 assert_equal("ATCGatcg", @entry.to_dna)
165 test "#to_dna changes entry type to dna" do
166 @entry.seq = 'AUCGaucg'
169 assert_equal(:dna, @entry.type)
172 test "#to_bp returns correct record" do
173 @entry.seq_name = 'test'
175 assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
178 test "#to_bp with missing seq_name raises" do
180 assert_raise(SeqError) { @entry.to_bp }
183 test "#to_bp with missing sequence raises" do
184 @entry.seq_name = 'test'
185 assert_raise(SeqError) { @entry.to_bp }
188 test "#to_fasta with missing seq_name raises" do
190 assert_raise(SeqError) { @entry.to_fasta }
193 test "#to_fasta with empty seq_name raises" do
196 assert_raise(SeqError) { @entry.to_fasta }
199 test "#to_fasta with missing seq raises" do
200 @entry.seq_name = 'test'
201 assert_raise(SeqError) { @entry.to_fasta }
204 test "#to_fasta with empty seq raises" do
205 @entry.seq_name = 'test'
207 assert_raise(SeqError) { @entry.to_fasta }
210 test "#to_fasta returns correct entry" do
211 @entry.seq_name = 'test'
213 assert_equal(">test\nATCG\n", @entry.to_fasta)
216 test "#to_fasta wraps correctly" do
217 entry = Seq.new("test", "ATCG")
218 assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
221 test "#to_fastq returns correct entry" do
222 @entry.seq_name = 'test'
225 assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
228 test "#to_key with bad residue raises" do
229 entry = Seq.new("test", "AUCG")
230 assert_raise(SeqError) { entry.to_key }
233 test "#to_key returns correctly" do
234 entry = Seq.new("test", "ATCG")
235 assert_equal(54, entry.to_key)
238 test "#reverse returns correctly" do
240 new_entry = @entry.reverse
241 assert_equal("GCTA", new_entry.seq)
242 assert_equal("ATCG", @entry.seq)
245 test "#reverse! returns correctly" do
248 assert_equal("GCTA", @entry.seq)
251 test "#complement with no sequence raises" do
253 assert_raise(SeqError) { @entry.complement }
256 test "#complement with bad type raises" do
258 @entry.type = :protein
259 assert_raise(SeqError) { @entry.complement }
262 test "#complement for DNA is correct" do
263 @entry.seq = 'ATCGatcg'
265 comp = @entry.complement
266 assert_equal("TAGCtagc", comp.seq)
267 assert_equal("ATCGatcg", @entry.seq)
270 test "#complement for RNA is correct" do
271 @entry.seq = 'AUCGaucg'
273 comp = @entry.complement
274 assert_equal("UAGCuagc", comp.seq)
275 assert_equal("AUCGaucg", @entry.seq)
278 test "#complement! with no sequence raises" do
280 assert_raise(SeqError) { @entry.complement! }
283 test "#complement! with bad type raises" do
285 @entry.type = :protein
286 assert_raise(SeqError) { @entry.complement! }
289 test "#complement! for DNA is correct" do
290 @entry.seq = 'ATCGatcg'
292 assert_equal("TAGCtagc", @entry.complement!.seq)
295 test "#complement! for RNA is correct" do
296 @entry.seq = 'AUCGaucg'
298 assert_equal("UAGCuagc", @entry.complement!.seq)
302 test "#hamming distance returns correctly" do
303 seq1 = Seq.new("test1", "ATCG")
304 seq2 = Seq.new("test2", "atgg")
305 assert_equal(1, seq1.hamming_distance(seq2))
308 test "#generate with length < 1 raises" do
309 assert_raise(SeqError) { @entry.generate(-10, :dna) }
310 assert_raise(SeqError) { @entry.generate(0, :dna) }
313 test "#generate with bad type raises" do
314 assert_raise(SeqError) { @entry.generate(10, "foo") }
317 test "#generate with ok type dont raise" do
318 %w[dna rna protein].each do |type|
319 assert_nothing_raised { @entry.generate(10, type.to_sym) }
323 test "#shuffle returns correctly" do
324 orig = "actgactgactgatcgatcgatcgatcgtactg"
325 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
326 entry_shuf = @entry.shuffle
327 assert_equal(orig, @entry.seq)
328 assert_not_equal(@entry.seq, entry_shuf.seq)
331 test "#shuffle! returns correctly" do
332 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
333 assert_not_equal(@entry.seq, @entry.shuffle!.seq)
336 test "#+ without qual returns correctly" do
337 entry = Seq.new("test1", "at") + Seq.new("test2", "cg")
338 assert_nil(entry.seq_name)
339 assert_equal("atcg", entry.seq)
340 assert_nil(entry.type)
341 assert_nil(entry.qual)
344 test "#+ with qual returns correctly" do
345 entry = Seq.new("test1", "at", :dna, "II") + Seq.new("test2", "cg", :dna, "JJ")
346 assert_nil(entry.seq_name)
347 assert_equal("atcg", entry.seq)
348 assert_equal(:dna, entry.type)
349 assert_equal("IIJJ", entry.qual)
352 test "#<< with different types raises" do
354 assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna) }
357 test "#<< with missing qual in one entry raises" do
360 assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna, "IIII") }
362 assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna) }
365 test "#<< with nil qual in both entries dont raise" do
367 assert_nothing_raised { @entry << Seq.new("test", "atcg") }
370 test "#<< with qual in both entries dont raise" do
374 assert_nothing_raised { @entry << Seq.new("test", "atcg", :dna, "IIII") }
377 test "#<< without qual returns correctly" do
379 @entry << Seq.new("test", "ATCG")
380 assert_equal("atcgATCG", @entry.seq)
383 test "#<< with qual returns correctly" do
387 @entry << Seq.new("test", "ATCG", :dna, "IIII")
388 assert_equal("atcgATCG", @entry.seq)
389 assert_equal("HHHHIIII", @entry.qual)
392 test "#[] with qual returns correctly" do
393 entry = Seq.new("test", "atcg", :dna, "FGHI")
397 assert_equal("test", e.seq_name)
398 assert_equal("c", e.seq)
399 assert_equal(:dna, e.type)
400 assert_equal("H", e.qual)
401 assert_equal("atcg", entry.seq)
402 assert_equal("FGHI", entry.qual)
405 test "#[] without qual returns correctly" do
406 entry = Seq.new("test", "atcg")
410 assert_equal("test", e.seq_name)
411 assert_equal("c", e.seq)
413 assert_equal("atcg", entry.seq)
416 test "[]= with qual returns correctly" do
417 entry = Seq.new("test", "atcg", :dna, "FGHI")
419 entry[0] = Seq.new("foo", "T", :dna, "I")
421 assert_equal("test", entry.seq_name)
422 assert_equal("Ttcg", entry.seq)
423 assert_equal(:dna, entry.type)
424 assert_equal("IGHI", entry.qual)
427 test "[]= without qual returns correctly" do
428 entry = Seq.new("test", "atcg")
430 entry[0] = Seq.new("foo", "T")
432 assert_equal("test", entry.seq_name)
433 assert_equal("Ttcg", entry.seq)
436 test "#subseq with start < 0 raises" do
438 assert_raise(SeqError) { @entry.subseq(-1, 1) }
441 test "#subseq with start plus length gt seq raises" do
443 assert_raise(SeqError) { @entry.subseq(0, 5) }
446 test "#subseq returns correct sequence" do
448 assert_equal("AT", @entry.subseq(0, 2).seq)
449 assert_equal("CG", @entry.subseq(2, 2).seq)
452 test "#subseq without length returns correct sequence" do
454 assert_equal("ATCG", @entry.subseq(0).seq)
455 assert_equal("CG", @entry.subseq(2).seq)
458 test "#subseq returns correct qual" do
461 assert_equal("ab", @entry.subseq(0, 2).qual)
462 assert_equal("cd", @entry.subseq(2, 2).qual)
465 test "#subseq without length returns correct qual" do
468 assert_equal("abcd", @entry.subseq(0).qual)
469 assert_equal("cd", @entry.subseq(2).qual)
472 test "#subseq! with start < 0 raises" do
474 assert_raise(SeqError) { @entry.subseq!(-1, 1) }
477 test "#subseq! with start plus length > seq.length raises" do
479 assert_raise(SeqError) { @entry.subseq!(0, 5) }
482 test "#subseq! returns correct sequence" do
485 assert_equal("AT", @entry.seq)
488 assert_equal("CG", @entry.seq)
491 test "#subseq! without length returns correct sequence" do
494 assert_equal("ATCG", @entry.seq)
497 assert_equal("CG", @entry.seq)
500 test "#subseq! with pos and length returns correct qual" do
504 assert_equal("ab", @entry.qual)
508 assert_equal("cd", @entry.qual)
511 test "#subseq! with pos returns correct qual" do
515 assert_equal("abcd", @entry.qual)
519 assert_equal("cd", @entry.qual)
522 test "#subseq_rand returns correct sequence" do
524 assert_equal("ATCG", @entry.subseq_rand(4).seq)
527 test "#indels_remove without qual returns correctly" do
528 @entry.seq = "A-T.CG~CG"
530 assert_equal("ATCGCG", @entry.indels_remove.seq)
533 test "#indels_remove with qual returns correctly" do
534 @entry.seq = "A-T.CG~CG"
535 @entry.qual = "a@b@cd@fg"
536 assert_equal("ATCGCG", @entry.indels_remove.seq)
537 assert_equal("abcdfg", @entry.indels_remove.qual)
540 test "#composition returns correctly" do
541 @entry.seq = "AAAATTTCCG"
542 assert_equal(4, @entry.composition["A"])
543 assert_equal(3, @entry.composition["T"])
544 assert_equal(2, @entry.composition["C"])
545 assert_equal(1, @entry.composition["G"])
546 assert_equal(0, @entry.composition["X"])
549 test "#homopol_max returns 0 with empty sequence" do
551 assert_equal(0, @entry.homopol_max)
554 test "#homopol_max returns 0 with nil sequence" do
556 assert_equal(0, @entry.homopol_max)
559 test "#homopol_max returns 0 when not found" do
560 @entry.seq = "AtTcCcGggGnnNnn"
561 assert_equal(0, @entry.homopol_max(6))
564 test "#homopol_max returns correctly" do
565 @entry.seq = "AtTcCcGggGnnNnn"
566 assert_equal(5, @entry.homopol_max(3))
569 test "#hard_mask returns correctly" do
570 @entry.seq = "--AAAANn"
571 assert_equal(33.33, @entry.hard_mask)
574 test "#soft_mask returns correctly" do
575 @entry.seq = "--AAAa"
576 assert_equal(25.00, @entry.soft_mask)
579 test "#mask_seq_hard! with nil seq raises" do
583 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
586 test "#mask_seq_hard! with nil qual raises" do
590 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
593 test "#mask_seq_hard! with bad cutoff raises" do
594 assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
595 assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
598 test "#mask_seq_hard! with OK cutoff dont raise" do
602 assert_nothing_raised { @entry.mask_seq_hard!(0) }
603 assert_nothing_raised { @entry.mask_seq_hard!(40) }
606 test "#mask_seq_hard! returns correctly" do
608 @entry.qual = "33456"
610 assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
613 test "#mask_seq_soft! with nil seq raises" do
617 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
620 test "#mask_seq_soft! with nil qual raises" do
624 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
627 test "#mask_seq_soft! with bad cutoff raises" do
628 assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
629 assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
632 test "#mask_seq_soft! with OK cutoff dont raise" do
636 assert_nothing_raised { @entry.mask_seq_soft!(0) }
637 assert_nothing_raised { @entry.mask_seq_soft!(40) }
640 test "#mask_seq_soft! returns correctly" do
642 @entry.qual = "33456"
644 assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
647 # qual score detection
649 test "#qual_base33? returns correctly" do
650 # self.qual.match(/[!-:]/)
651 @entry.qual = '!"#$%&\'()*+,-./0123456789:'
652 assert_equal(true, @entry.qual_base33? )
654 assert_equal(false, @entry.qual_base33? )
656 assert_equal(false, @entry.qual_base33? )
659 test "#qual_base64? returns correctly" do
660 # self.qual.match(/[K-h]/)
661 @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
662 assert_equal(true, @entry.qual_base64? )
664 assert_equal(false, @entry.qual_base64? )
665 @entry.qual = 105.chr
666 assert_equal(false, @entry.qual_base64? )
669 test "#qual_valid? with nil qual raises" do
670 assert_raise(SeqError) { @entry.qual_valid?(:base_33) }
671 assert_raise(SeqError) { @entry.qual_valid?(:base_64) }
674 test "#qual_valid? with bad encoding raises" do
676 assert_raise(SeqError) { @entry.qual_valid?("foobar") }
679 test "#qual_valid? with OK range returns correctly" do
680 @entry.qual = ((Seq::SCORE_MIN + 33).chr .. (Seq::SCORE_MAX + 33).chr).to_a.join
681 assert_equal(true, @entry.qual_valid?(:base_33))
682 @entry.qual = ((Seq::SCORE_MIN + 64).chr .. (Seq::SCORE_MAX + 64).chr).to_a.join
683 assert_equal(true, @entry.qual_valid?(:base_64))
686 test "#qual_valid? with bad range returns correctly" do
687 @entry.qual = ((Seq::SCORE_MIN + 33 - 1).chr .. (Seq::SCORE_MAX + 33).chr).to_a.join
688 assert_equal(false, @entry.qual_valid?(:base_33))
689 @entry.qual = ((Seq::SCORE_MIN + 33).chr .. (Seq::SCORE_MAX + 33 + 1).chr).to_a.join
690 assert_equal(false, @entry.qual_valid?(:base_33))
692 @entry.qual = ((Seq::SCORE_MIN + 64 - 1).chr .. (Seq::SCORE_MAX + 64).chr).to_a.join
693 assert_equal(false, @entry.qual_valid?(:base_64))
694 @entry.qual = ((Seq::SCORE_MIN + 64).chr .. (Seq::SCORE_MAX + 64 + 1).chr).to_a.join
695 assert_equal(false, @entry.qual_valid?(:base_64))
698 # convert sanger to ...
700 test "#qual_convert! from base33 to base33 returns OK" do
701 @entry.qual = 'BCDEFGHI'
702 assert_equal('BCDEFGHI', @entry.qual_convert!(:base_33, :base_33).qual)
705 test "#qual_convert! from base33 to base64 returns OK" do
706 @entry.qual = 'BCDEFGHI'
707 assert_equal('abcdefgh', @entry.qual_convert!(:base_33, :base_64).qual)
710 test "#qual_convert! from base64 to base64 returns OK" do
711 @entry.qual = 'BCDEFGHI'
712 assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_64).qual)
715 test "#qual_convert! from base64 to base33 returns OK" do
716 @entry.qual = 'abcdefgh'
717 assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_33).qual)
720 test "#qual_coerce! returns correctly" do
721 @entry.qual = ('!' .. '~').to_a.join
722 assert_equal("!\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", @entry.qual_coerce!(:base_33).qual)
723 @entry.qual = ('!' .. '~').to_a.join
724 assert_equal("!\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZh\\h^_`abcdefghhhhhhhhhhhhhhhhhhhhhhh", @entry.qual_coerce!(:base_64).qual)
727 test "#scores_mean without qual raises" do
729 assert_raise(SeqError) { @entry.scores_mean }
732 test "#scores_mean returns correctly" do
734 assert_equal(20.0, @entry.scores_mean)