2 $:.unshift File.join(File.dirname(__FILE__), '..', '..')
4 # Copyright (C) 2011 Martin A. Hansen.
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # This software is part of the Biopieces framework (www.biopieces.org).
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
32 class TestSeq < Test::Unit::TestCase
37 test "Seq.new with differnet length SEQ and SCORES raises" do
38 assert_raise(SeqError) { Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "hhh") }
41 test "Seq.new_bp returns correctly" do
42 record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => :dna, :SCORES => "hhhh"}
43 seq = Seq.new_bp(record)
44 assert_equal("test", seq.seq_name)
45 assert_equal("ATCG", seq.seq)
46 assert_equal(:dna, seq.type)
47 assert_equal("hhhh", seq.qual)
50 test "#is_dna? with no sequence type returns false" do
51 assert(@entry.is_dna? == false)
54 test "#is_dna? with dna sequence type returns true" do
56 assert(@entry.is_dna? == true)
59 test "#is_rna? with no sequence type returns false" do
60 assert(@entry.is_rna? == false)
63 test "#is_rna? with rna sequence type returns true" do
65 assert(@entry.is_rna? == true)
68 test "#is_protein? with no sequence type returns false" do
69 assert(@entry.is_protein? == false)
72 test "#is_protein? with protein sequence type returns true" do
73 @entry.type = :protein
74 assert_equal(true, @entry.is_protein?)
77 test "#type_guess without sequence raises" do
78 assert_raise(SeqError) { @entry.type_guess }
81 test "#type_guess with protein returns protein" do
82 @entry.seq = 'atcatcrFgatcg'
83 assert_equal(:protein, @entry.type_guess)
86 test "#type_guess with rna returns rna" do
87 @entry.seq = 'atcatcrUgatcg'
88 assert_equal(:rna, @entry.type_guess)
91 test "#type_guess with dna returns dna" do
92 @entry.seq = 'atcatcgatcg'
93 assert_equal(:dna, @entry.type_guess)
96 test "#type_guess! without sequence raises" do
97 assert_raise(SeqError) { @entry.type_guess! }
100 test "#type_guess! with protein returns protein" do
101 @entry.seq = 'atcatcrFgatcg'
103 assert_equal(:protein, @entry.type)
106 test "#type_guess! with rna returns rna" do
107 @entry.seq = 'atcatcrUgatcg'
109 assert_equal(:rna, @entry.type)
112 test "#type_guess! with dna returns dna" do
113 @entry.seq = 'atcatcgatcg'
115 assert_equal(:dna, @entry.type)
118 test "#length returns corretly" do
120 assert_equal(4, @entry.length)
123 test "#indels returns correctly" do
124 @entry.seq = 'ATCG.-~_'
125 assert_equal(4, @entry.indels)
128 test "#to_rna with no sequence raises" do
130 assert_raise(SeqError) { @entry.to_rna }
133 test "#to_rna with bad type raises" do
136 assert_raise(SeqError) { @entry.to_rna }
139 test "#to_rna transcribes correctly" do
140 @entry.seq = 'ATCGatcg'
142 assert_equal("AUCGaucg", @entry.to_rna)
145 test "#to_rna changes entry type to rna" do
146 @entry.seq = 'ATCGatcg'
149 assert_equal(:rna, @entry.type)
152 test "#to_dna with no sequence raises" do
154 assert_raise(SeqError) { @entry.to_dna }
157 test "#to_dna with bad type raises" do
160 assert_raise(SeqError) { @entry.to_dna }
163 test "#to_dna transcribes correctly" do
164 @entry.seq = 'AUCGaucg'
166 assert_equal("ATCGatcg", @entry.to_dna)
169 test "#to_dna changes entry type to dna" do
170 @entry.seq = 'AUCGaucg'
173 assert_equal(:dna, @entry.type)
176 test "#to_bp returns correct record" do
177 @entry.seq_name = 'test'
179 assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
182 test "#to_bp with missing seq_name raises" do
184 assert_raise(SeqError) { @entry.to_bp }
187 test "#to_bp with missing sequence raises" do
188 @entry.seq_name = 'test'
189 assert_raise(SeqError) { @entry.to_bp }
192 test "#to_fasta with missing seq_name raises" do
194 assert_raise(SeqError) { @entry.to_fasta }
197 test "#to_fasta with empty seq_name raises" do
200 assert_raise(SeqError) { @entry.to_fasta }
203 test "#to_fasta with missing seq raises" do
204 @entry.seq_name = 'test'
205 assert_raise(SeqError) { @entry.to_fasta }
208 test "#to_fasta with empty seq raises" do
209 @entry.seq_name = 'test'
211 assert_raise(SeqError) { @entry.to_fasta }
214 test "#to_fasta returns correct entry" do
215 @entry.seq_name = 'test'
217 assert_equal(">test\nATCG\n", @entry.to_fasta)
220 test "#to_fasta wraps correctly" do
221 entry = Seq.new(seq_name: "test", seq: "ATCG")
222 assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
225 test "#to_fastq returns correct entry" do
226 @entry.seq_name = 'test'
229 assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
232 test "#to_key with bad residue raises" do
233 entry = Seq.new(seq_name: "test", seq: "AUCG")
234 assert_raise(SeqError) { entry.to_key }
237 test "#to_key returns correctly" do
238 entry = Seq.new(seq_name: "test", seq: "ATCG")
239 assert_equal(54, entry.to_key)
242 test "#reverse returns correctly" do
244 new_entry = @entry.reverse
245 assert_equal("GCTA", new_entry.seq)
246 assert_equal("ATCG", @entry.seq)
249 test "#reverse! returns correctly" do
252 assert_equal("GCTA", @entry.seq)
255 test "#complement with no sequence raises" do
257 assert_raise(SeqError) { @entry.complement }
260 test "#complement with bad type raises" do
262 @entry.type = :protein
263 assert_raise(SeqError) { @entry.complement }
266 test "#complement for DNA is correct" do
267 @entry.seq = 'ATCGatcg'
269 comp = @entry.complement
270 assert_equal("TAGCtagc", comp.seq)
271 assert_equal("ATCGatcg", @entry.seq)
274 test "#complement for RNA is correct" do
275 @entry.seq = 'AUCGaucg'
277 comp = @entry.complement
278 assert_equal("UAGCuagc", comp.seq)
279 assert_equal("AUCGaucg", @entry.seq)
282 test "#complement! with no sequence raises" do
284 assert_raise(SeqError) { @entry.complement! }
287 test "#complement! with bad type raises" do
289 @entry.type = :protein
290 assert_raise(SeqError) { @entry.complement! }
293 test "#complement! for DNA is correct" do
294 @entry.seq = 'ATCGatcg'
296 assert_equal("TAGCtagc", @entry.complement!.seq)
299 test "#complement! for RNA is correct" do
300 @entry.seq = 'AUCGaucg'
302 assert_equal("UAGCuagc", @entry.complement!.seq)
305 test "#hamming_distance returns correctly" do
306 seq1 = Seq.new(seq: "ATCG")
307 seq2 = Seq.new(seq: "atgg")
308 assert_equal(1, seq1.hamming_distance(seq2))
311 test "#hamming_distance with ambiguity codes return correctly" do
312 seq1 = Seq.new(seq: "ATCG")
313 seq2 = Seq.new(seq: "atng")
315 assert_equal(1, seq1.hamming_distance(seq2))
316 assert_equal(0, seq1.hamming_distance(seq2, ambiguity: true))
319 test "#edit_distance returns correctly" do
320 seq1 = Seq.new(seq: "ATCG")
321 seq2 = Seq.new(seq: "tgncg")
322 assert_equal(2, seq1.edit_distance(seq2))
325 test "#generate with length < 1 raises" do
326 assert_raise(SeqError) { @entry.generate(-10, :dna) }
327 assert_raise(SeqError) { @entry.generate(0, :dna) }
330 test "#generate with bad type raises" do
331 assert_raise(SeqError) { @entry.generate(10, "foo") }
334 test "#generate with ok type dont raise" do
335 %w[dna rna protein].each do |type|
336 assert_nothing_raised { @entry.generate(10, type.to_sym) }
340 test "#shuffle returns correctly" do
341 orig = "actgactgactgatcgatcgatcgatcgtactg"
342 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
343 entry_shuf = @entry.shuffle
344 assert_equal(orig, @entry.seq)
345 assert_not_equal(@entry.seq, entry_shuf.seq)
348 test "#shuffle! returns correctly" do
349 @entry.seq = "actgactgactgatcgatcgatcgatcgtactg"
350 assert_not_equal(@entry.seq, @entry.shuffle!.seq)
353 test "#+ without qual returns correctly" do
354 entry = Seq.new(seq_name: "test1", seq: "at") + Seq.new(seq_name: "test2", seq: "cg")
355 assert_nil(entry.seq_name)
356 assert_equal("atcg", entry.seq)
357 assert_nil(entry.type)
358 assert_nil(entry.qual)
361 test "#+ with qual returns correctly" do
362 entry = Seq.new(seq_name: "test1", seq: "at", type: :dna, qual: "II") + Seq.new(seq_name: "test2", seq: "cg", type: :dna, qual: "JJ")
363 assert_nil(entry.seq_name)
364 assert_equal("atcg", entry.seq)
365 assert_equal(:dna, entry.type)
366 assert_equal("IIJJ", entry.qual)
369 test "#<< with different types raises" do
371 assert_raise(SeqError) { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
374 test "#<< with missing qual in one entry raises" do
377 assert_raise(SeqError) { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
379 assert_raise(SeqError) { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
382 test "#<< with nil qual in both entries dont raise" do
384 assert_nothing_raised { @entry << Seq.new(seq_name: "test", seq: "atcg") }
387 test "#<< with qual in both entries dont raise" do
391 assert_nothing_raised { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
394 test "#<< without qual returns correctly" do
396 @entry << Seq.new(seq_name: "test", seq: "ATCG")
397 assert_equal("atcgATCG", @entry.seq)
400 test "#<< with qual returns correctly" do
404 @entry << Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "IIII")
405 assert_equal("atcgATCG", @entry.seq)
406 assert_equal("HHHHIIII", @entry.qual)
409 test "#[] with qual returns correctly" do
410 entry = Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
414 assert_equal("test", e.seq_name)
415 assert_equal("c", e.seq)
416 assert_equal(:dna, e.type)
417 assert_equal("H", e.qual)
418 assert_equal("atcg", entry.seq)
419 assert_equal("FGHI", entry.qual)
422 test "#[] without qual returns correctly" do
423 entry = Seq.new(seq_name: "test", seq: "atcg")
427 assert_equal("test", e.seq_name)
428 assert_equal("c", e.seq)
430 assert_equal("atcg", entry.seq)
433 test "[]= with qual returns correctly" do
434 entry = Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
436 entry[0] = Seq.new(seq_name: "foo", seq: "T", type: :dna, qual: "I")
438 assert_equal("test", entry.seq_name)
439 assert_equal("Ttcg", entry.seq)
440 assert_equal(:dna, entry.type)
441 assert_equal("IGHI", entry.qual)
444 test "[]= without qual returns correctly" do
445 entry = Seq.new(seq_name: "test", seq: "atcg")
447 entry[0] = Seq.new(seq_name: "foo", seq: "T")
449 assert_equal("test", entry.seq_name)
450 assert_equal("Ttcg", entry.seq)
453 test "#subseq with start < 0 raises" do
455 assert_raise(SeqError) { @entry.subseq(-1, 1) }
458 test "#subseq with start plus length gt seq raises" do
460 assert_raise(SeqError) { @entry.subseq(0, 5) }
463 test "#subseq returns correct sequence" do
465 assert_equal("AT", @entry.subseq(0, 2).seq)
466 assert_equal("CG", @entry.subseq(2, 2).seq)
469 test "#subseq without length returns correct sequence" do
471 assert_equal("ATCG", @entry.subseq(0).seq)
472 assert_equal("CG", @entry.subseq(2).seq)
475 test "#subseq returns correct qual" do
478 assert_equal("ab", @entry.subseq(0, 2).qual)
479 assert_equal("cd", @entry.subseq(2, 2).qual)
482 test "#subseq without length returns correct qual" do
485 assert_equal("abcd", @entry.subseq(0).qual)
486 assert_equal("cd", @entry.subseq(2).qual)
489 test "#subseq! with start < 0 raises" do
491 assert_raise(SeqError) { @entry.subseq!(-1, 1) }
494 test "#subseq! with start plus length > seq.length raises" do
496 assert_raise(SeqError) { @entry.subseq!(0, 5) }
499 test "#subseq! returns correct sequence" do
502 assert_equal("AT", @entry.seq)
505 assert_equal("CG", @entry.seq)
508 test "#subseq! without length returns correct sequence" do
511 assert_equal("ATCG", @entry.seq)
514 assert_equal("CG", @entry.seq)
517 test "#subseq! with pos and length returns correct qual" do
521 assert_equal("ab", @entry.qual)
525 assert_equal("cd", @entry.qual)
528 test "#subseq! with pos returns correct qual" do
532 assert_equal("abcd", @entry.qual)
536 assert_equal("cd", @entry.qual)
539 test "#subseq_rand returns correct sequence" do
541 assert_equal("ATCG", @entry.subseq_rand(4).seq)
544 test "#indels_remove without qual returns correctly" do
545 @entry.seq = "A-T.CG~CG"
547 assert_equal("ATCGCG", @entry.indels_remove.seq)
550 test "#indels_remove with qual returns correctly" do
551 @entry.seq = "A-T.CG~CG"
552 @entry.qual = "a@b@cd@fg"
553 assert_equal("ATCGCG", @entry.indels_remove.seq)
554 assert_equal("abcdfg", @entry.indels_remove.qual)
557 test "#composition returns correctly" do
558 @entry.seq = "AAAATTTCCG"
559 assert_equal(4, @entry.composition["A"])
560 assert_equal(3, @entry.composition["T"])
561 assert_equal(2, @entry.composition["C"])
562 assert_equal(1, @entry.composition["G"])
563 assert_equal(0, @entry.composition["X"])
566 test "#hard_mask returns correctly" do
567 @entry.seq = "--AAAANn"
568 assert_equal(33.33, @entry.hard_mask)
571 test "#soft_mask returns correctly" do
572 @entry.seq = "--AAAa"
573 assert_equal(25.00, @entry.soft_mask)
576 test "#mask_seq_hard! with nil seq raises" do
580 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
583 test "#mask_seq_hard! with nil qual raises" do
587 assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
590 test "#mask_seq_hard! with bad cutoff raises" do
591 assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
592 assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
595 test "#mask_seq_hard! with OK cutoff dont raise" do
599 assert_nothing_raised { @entry.mask_seq_hard!(0) }
600 assert_nothing_raised { @entry.mask_seq_hard!(40) }
603 test "#mask_seq_hard! returns correctly" do
605 @entry.qual = "33456"
607 assert_equal("-NNCG", @entry.mask_seq_hard!(20).seq)
610 test "#mask_seq_soft! with nil seq raises" do
614 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
617 test "#mask_seq_soft! with nil qual raises" do
621 assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
624 test "#mask_seq_soft! with bad cutoff raises" do
625 assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
626 assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
629 test "#mask_seq_soft! with OK cutoff dont raise" do
633 assert_nothing_raised { @entry.mask_seq_soft!(0) }
634 assert_nothing_raised { @entry.mask_seq_soft!(40) }
637 test "#mask_seq_soft! returns correctly" do
639 @entry.qual = "33456"
641 assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
644 # qual score detection
646 test "#qual_base33? returns correctly" do
647 # self.qual.match(/[!-:]/)
648 @entry.qual = '!"#$%&\'()*+,-./0123456789:'
649 assert_equal(true, @entry.qual_base33? )
651 assert_equal(false, @entry.qual_base33? )
653 assert_equal(false, @entry.qual_base33? )
656 test "#qual_base64? returns correctly" do
657 # self.qual.match(/[K-h]/)
658 @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
659 assert_equal(true, @entry.qual_base64? )
661 assert_equal(false, @entry.qual_base64? )
662 @entry.qual = 105.chr
663 assert_equal(false, @entry.qual_base64? )
666 test "#qual_valid? with nil qual raises" do
667 assert_raise(SeqError) { @entry.qual_valid?(:base_33) }
668 assert_raise(SeqError) { @entry.qual_valid?(:base_64) }
671 test "#qual_valid? with bad encoding raises" do
673 assert_raise(SeqError) { @entry.qual_valid?("foobar") }
676 test "#qual_valid? with OK range returns correctly" do
677 @entry.qual = ((Seq::SCORE_MIN + 33).chr .. (Seq::SCORE_MAX + 33).chr).to_a.join
678 assert_equal(true, @entry.qual_valid?(:base_33))
679 @entry.qual = ((Seq::SCORE_MIN + 64).chr .. (Seq::SCORE_MAX + 64).chr).to_a.join
680 assert_equal(true, @entry.qual_valid?(:base_64))
683 test "#qual_valid? with bad range returns correctly" do
684 @entry.qual = ((Seq::SCORE_MIN + 33 - 1).chr .. (Seq::SCORE_MAX + 33).chr).to_a.join
685 assert_equal(false, @entry.qual_valid?(:base_33))
686 @entry.qual = ((Seq::SCORE_MIN + 33).chr .. (Seq::SCORE_MAX + 33 + 1).chr).to_a.join
687 assert_equal(false, @entry.qual_valid?(:base_33))
689 @entry.qual = ((Seq::SCORE_MIN + 64 - 1).chr .. (Seq::SCORE_MAX + 64).chr).to_a.join
690 assert_equal(false, @entry.qual_valid?(:base_64))
691 @entry.qual = ((Seq::SCORE_MIN + 64).chr .. (Seq::SCORE_MAX + 64 + 1).chr).to_a.join
692 assert_equal(false, @entry.qual_valid?(:base_64))
695 # convert sanger to ...
697 test "#qual_convert! from base33 to base33 returns OK" do
698 @entry.qual = 'BCDEFGHI'
699 assert_equal('BCDEFGHI', @entry.qual_convert!(:base_33, :base_33).qual)
702 test "#qual_convert! from base33 to base64 returns OK" do
703 @entry.qual = 'BCDEFGHI'
704 assert_equal('abcdefgh', @entry.qual_convert!(:base_33, :base_64).qual)
707 test "#qual_convert! from base64 to base64 returns OK" do
708 @entry.qual = 'BCDEFGHI'
709 assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_64).qual)
712 test "#qual_convert! from base64 to base33 returns OK" do
713 @entry.qual = 'abcdefgh'
714 assert_equal('BCDEFGHI', @entry.qual_convert!(:base_64, :base_33).qual)
717 test "#qual_coerce! returns correctly" do
718 @entry.qual = ('!' .. '~').to_a.join
719 assert_equal("!\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", @entry.qual_coerce!(:base_33).qual)
720 @entry.qual = ('!' .. '~').to_a.join
721 assert_equal("!\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZh\\h^_`abcdefghhhhhhhhhhhhhhhhhhhhhhh", @entry.qual_coerce!(:base_64).qual)
724 test "#scores_mean without qual raises" do
726 assert_raise(SeqError) { @entry.scores_mean }
729 test "#scores_mean returns correctly" do
731 assert_equal(20.0, @entry.scores_mean)