]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/test/maasha/test_seq.rb
rewrote mask_seq_hard and mask_seq_soft methods to NArrays for speed
[biopieces.git] / code_ruby / test / maasha / test_seq.rb
1 #!/usr/bin/env ruby
2
3 require 'maasha/seq'
4 require 'test/unit'
5 require 'pp'
6
7 class TestSeq < Test::Unit::TestCase 
8   def setup
9     @entry = Seq.new
10   end
11
12   #  def test_Seq# autoremoves whitespace, newlines, and carriage returns
13   #    dna = Seq.new
14   #    dna.seq = "A\tT\r\tC\nG  "
15   #    assert_equal(dna.seq, "ATCG")
16   #  end
17   
18   def test_Seq_new_bp_returns_correctly
19     record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
20     seq    = Seq.new_bp(record)
21     assert_equal("test", seq.seq_name)
22     assert_equal("ATCG", seq.seq)
23     assert_equal("dna",  seq.type)
24     assert_equal("hhhh", seq.qual)
25   end
26
27   def test_Seq_is_dna_with_no_sequence_type_returns_false
28     assert(@entry.is_dna? == false)
29   end
30
31   def test_Seq_is_dna_with_dna_sequence_type_returns_true
32     @entry.type = 'dna'
33     assert(@entry.is_dna? == true)
34   end
35
36   def test_Seq_is_rna_with_no_sequence_type_returns_false
37     assert(@entry.is_rna? == false)
38   end
39
40   def test_Seq_is_rna_with_rna_sequence_type_returns_true
41     @entry.type = 'rna'
42     assert(@entry.is_rna? == true)
43   end
44
45   def test_Seq_is_protein_with_no_sequence_type_returns_false
46     assert(@entry.is_protein? == false)
47   end
48
49   def test_Seq_is_protein_with_protein_sequence_type_returns_true
50     @entry.type = 'protein'
51     assert_equal(true, @entry.is_protein?)
52   end
53
54   def test_Seq_type_guess_without_sequence_raises
55     assert_raise(SeqError) { @entry.type_guess }
56   end
57
58   def test_Seq_type_guess_with_protein_returns_protein
59     @entry.seq = 'atcatcrFgatcg'
60     assert_equal('protein', @entry.type_guess)
61   end
62
63   def test_Seq_type_guess_with_rna_returns_rna
64     @entry.seq = 'atcatcrUgatcg'
65     assert_equal('rna', @entry.type_guess)
66   end
67
68   def test_Seq_type_guess_with_dna_returns_dna
69     @entry.seq = 'atcatcgatcg'
70     assert_equal('dna', @entry.type_guess)
71   end
72
73   def test_Seq_type_guess_EM_without_sequence_raises
74     assert_raise(SeqError) { @entry.type_guess! }
75   end
76
77   def test_Seq_type_guess_EM_with_protein_returns_protein
78     @entry.seq = 'atcatcrFgatcg'
79     @entry.type_guess!
80     assert_equal('protein', @entry.type)
81   end
82
83   def test_Seq_type_guess_EM_with_rna_returns_rna
84     @entry.seq = 'atcatcrUgatcg'
85     @entry.type_guess!
86     assert_equal('rna', @entry.type)
87   end
88
89   def test_Seq_type_guess_EM_with_dna_returns_dna
90     @entry.seq = 'atcatcgatcg'
91     @entry.type_guess!
92     assert_equal('dna', @entry.type)
93   end
94
95   def test_Seq_length_is_correct
96     @entry.seq = 'ATCG'
97     assert_equal(4, @entry.length)
98   end
99
100   def test_Seq_indels_is_correct
101     @entry.seq = 'ATCG.-~_'
102     assert_equal(4, @entry.indels)
103   end
104
105   def test_Seq_to_rna_raises_if_no_sequence
106     @entry.type = 'dna'
107     assert_raise(SeqError) { @entry.to_rna }
108   end
109
110   def test_Seq_to_rna_raises_on_bad_type
111     @entry.seq  = 'ATCG'
112     @entry.type = 'rna'
113     assert_raise(SeqError) { @entry.to_rna }
114   end
115
116   def test_Seq_to_rna_transcribes_correctly
117     @entry.seq  = 'ATCGatcg'
118     @entry.type = 'dna'
119     assert_equal("AUCGaucg", @entry.to_rna)
120   end
121
122   def test_Seq_to_rna_changes_entry_type_to_rna
123     @entry.seq  = 'ATCGatcg'
124     @entry.type = 'dna'
125     @entry.to_rna
126     assert_equal("rna", @entry.type)
127   end
128
129   def test_Seq_to_dna_raises_if_no_sequence
130     @entry.type = 'rna'
131     assert_raise(SeqError) { @entry.to_dna }
132   end
133
134   def test_Seq_to_dna_raises_on_bad_type
135     @entry.seq  = 'AUCG'
136     @entry.type = 'dna'
137     assert_raise(SeqError) { @entry.to_dna }
138   end
139
140   def test_Seq_to_dna_transcribes_correctly
141     @entry.seq  = 'AUCGaucg'
142     @entry.type = 'rna'
143     assert_equal("ATCGatcg", @entry.to_dna)
144   end
145
146   def test_Seq_to_dna_changes_entry_type_to_dna
147     @entry.seq  = 'AUCGaucg'
148     @entry.type = 'rna'
149     @entry.to_dna
150     assert_equal("dna", @entry.type)
151   end
152
153   def test_Seq_to_bp_returns_correct_record
154     @entry.seq_name = 'test'
155     @entry.seq      = 'ATCG'
156     assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
157   end
158
159   def test_Seq_to_bp_raises_on_missing_seq_name
160     @entry.seq = 'ATCG'
161     assert_raise(SeqError) { @entry.to_bp }
162   end
163
164   def test_Seq_to_bp_raises_on_missing_sequence
165     @entry.seq_name = 'test'
166     assert_raise(SeqError) { @entry.to_bp }
167   end
168
169   def test_Seq_to_fasta_raises_on_missing_seq_name
170     @entry.seq = 'ATCG'
171     assert_raise(SeqError) { @entry.to_fasta }
172   end
173
174   def test_Seq_to_fasta_raises_on_empty_seq_name
175     @entry.seq_name = ''
176     @entry.seq      = 'ATCG'
177     assert_raise(SeqError) { @entry.to_fasta }
178   end
179
180   def test_Seq_to_fasta_raises_on_missing_seq
181     @entry.seq_name = 'test'
182     assert_raise(SeqError) { @entry.to_fasta }
183   end
184
185   def test_Seq_to_fasta_raises_on_empty_seq
186     @entry.seq_name = 'test'
187     @entry.seq      = ''
188     assert_raise(SeqError) { @entry.to_fasta }
189   end
190
191   def test_Seq_to_fasta_returns_correct_entry
192     @entry.seq_name = 'test'
193     @entry.seq      = 'ATCG'
194     assert_equal(">test\nATCG\n", @entry.to_fasta)
195   end
196
197   def test_Seq_to_fasta_wraps_correctly
198     entry = Seq.new("test", "ATCG")
199     assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
200   end
201
202   def test_Seq_to_fastq_returns_correct_entry
203     @entry.seq_name = 'test'
204     @entry.seq      = 'ATCG'
205     @entry.qual     = 'hhhh'
206     assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
207   end
208
209   def test_Seq_to_key_with_bad_residue_raises
210     entry = Seq.new("test", "AUCG")
211     assert_raise(SeqError) { entry.to_key }
212   end
213
214   def test_Seq_to_key_returns_correctly
215     entry = Seq.new("test", "ATCG")
216     assert_equal(54, entry.to_key)
217   end
218
219   def test_Seq_reverse_returns_correctly
220     @entry.seq = "ATCG"
221     assert_equal("GCTA", @entry.reverse.seq)
222   end
223
224   def test_Seq_complement_raises_if_no_sequence
225     @entry.type = 'dna'
226     assert_raise(SeqError) { @entry.complement }
227   end
228
229   def test_Seq_complement_raises_on_bad_type
230     @entry.seq  = 'ATCG'
231     @entry.type = 'protein'
232     assert_raise(SeqError) { @entry.complement }
233   end
234
235   def test_Seq_complement_for_DNA_is_correct
236     @entry.seq  = 'ATCGatcg'
237     @entry.type = 'dna'
238     assert_equal("TAGCtagc", @entry.complement)
239   end
240
241   def test_Seq_complement_for_RNA_is_correct
242     @entry.seq  = 'AUCGaucg'
243     @entry.type = 'rna'
244     assert_equal("UAGCuagc", @entry.complement)
245   end
246
247   def test_Seq_reverse_complement_for_DNA_is_correct
248     @entry.seq  = 'ATCGatcg'
249     @entry.type = 'dna'
250     assert_equal("cgatCGAT", @entry.reverse_complement.seq)
251   end
252
253   def test_Seq_reverse_complement_for_RNA_is_correct
254     @entry.seq  = 'AUCGaucg'
255     @entry.type = 'rna'
256     assert_equal("cgauCGAU", @entry.reverse_complement.seq)
257   end
258
259   def test_Seq_hamming_distance_returns_correctly
260     seq1 = Seq.new("test1", "ATCG")
261     seq2 = Seq.new("test2", "atgg")
262     assert_equal(1, seq1.hamming_distance(seq2))
263   end
264
265   def test_Seq_generate_with_length_lt_1_raises
266     assert_raise(SeqError) { @entry.generate(-10, "dna") }
267     assert_raise(SeqError) { @entry.generate(0, "dna") }
268   end
269
270   def test_Seq_generate_with_bad_type_raises
271     assert_raise(SeqError) { @entry.generate(10, "foo") }
272   end
273
274   def test_Seq_generate_with_ok_type_dont_raise
275     %w[dna DNA rna RNA protein Protein].each do |type|
276       assert_nothing_raised { @entry.generate(10, type) }
277     end
278   end
279
280   def test_Seq_subseq_with_start_lt_0_raises
281     @entry.seq = "ATCG"
282     assert_raise(SeqError) { @entry.subseq(-1, 1) }
283   end
284
285   def test_Seq_subseq_with_length_lt_1_raises
286     @entry.seq = "ATCG"
287     assert_raise(SeqError) { @entry.subseq(0, 0) }
288   end
289
290   def test_Seq_subseq_with_start_plus_length_gt_seq_raises
291     @entry.seq = "ATCG"
292     assert_raise(SeqError) { @entry.subseq(0, 5) }
293   end
294
295   def test_Seq_subseq_returns_correct_sequence
296     @entry.seq  = "ATCG"
297     assert_equal("AT", @entry.subseq(0, 2).seq)
298     assert_equal("CG", @entry.subseq(2, 2).seq)
299   end
300
301   def test_Seq_subseq_without_len_returns_correct_sequence
302     @entry.seq  = "ATCG"
303     assert_equal("ATCG", @entry.subseq(0).seq)
304     assert_equal("CG",   @entry.subseq(2).seq)
305   end
306
307   def test_Seq_subseq_returns_correct_qual
308     @entry.seq  = "ATCG"
309     @entry.qual = "abcd"
310     assert_equal("ab", @entry.subseq(0, 2).qual)
311     assert_equal("cd", @entry.subseq(2, 2).qual)
312   end
313
314   def test_Seq_subseq_without_len_returns_correct_qual
315     @entry.seq  = "ATCG"
316     @entry.qual = "abcd"
317     assert_equal("abcd", @entry.subseq(0).qual)
318     assert_equal("cd",   @entry.subseq(2).qual)
319   end
320
321   def test_Seq_subseq_bang_with_start_lt_0_raises
322     @entry.seq = "ATCG"
323     assert_raise(SeqError) { @entry.subseq!(-1, 1) }
324   end
325
326   def test_Seq_subseq_bang_with_length_lt_1_raises
327     @entry.seq = "ATCG"
328     assert_raise(SeqError) { @entry.subseq!(0, 0) }
329   end
330
331   def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
332     @entry.seq = "ATCG"
333     assert_raise(SeqError) { @entry.subseq!(0, 5) }
334   end
335
336   def test_Seq_subseq_bang_returns_correct_sequence
337     @entry.seq  = "ATCG"
338     @entry.subseq!(0, 2)
339     assert_equal("AT", @entry.seq)
340     @entry.seq  = "ATCG"
341     @entry.subseq!(2, 2)
342     assert_equal("CG", @entry.seq)
343   end
344
345   def test_Seq_subseq_bang_without_len_returns_correct_sequence
346     @entry.seq  = "ATCG"
347     @entry.subseq!(0)
348     assert_equal("ATCG", @entry.seq)
349     @entry.seq  = "ATCG"
350     @entry.subseq!(2)
351     assert_equal("CG", @entry.seq)
352   end
353
354   def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
355     @entry.seq  = "ATCG"
356     @entry.qual = "abcd"
357     @entry.subseq!(0, 2)
358     assert_equal("ab", @entry.qual)
359     @entry.seq  = "ATCG"
360     @entry.qual = "abcd"
361     @entry.subseq!(2, 2)
362     assert_equal("cd", @entry.qual)
363   end
364
365   def test_Seq_subseq_bang_with_pos_returns_correct_qual
366     @entry.seq  = "ATCG"
367     @entry.qual = "abcd"
368     @entry.subseq!(0)
369     assert_equal("abcd", @entry.qual)
370     @entry.seq  = "ATCG"
371     @entry.qual = "abcd"
372     @entry.subseq!(2)
373     assert_equal("cd", @entry.qual)
374   end
375
376   def test_Seq_subseq_rand_returns_correct_sequence
377     @entry.seq  = "ATCG"
378     assert_equal("ATCG", @entry.subseq_rand(4).seq)
379   end
380
381   def test_Seq_quality_trim_right_with_missing_seq_raises
382     @entry.qual = "hhhh"
383     assert_raise(SeqError) { @entry.quality_trim_right(20) }
384   end
385
386   def test_Seq_quality_trim_right_with_missing_qual_raises
387     @entry.seq = "ATCG"
388     assert_raise(SeqError) { @entry.quality_trim_right(20) }
389   end
390
391   def test_Seq_quality_trim_right_with_bad_min_raises
392     @entry.seq  = "ATCG"
393     @entry.qual = "hhhh"
394
395     [-1, 41].each do |min|
396       assert_raise(SeqError) { @entry.quality_trim_right(min) }
397     end
398   end
399
400   def test_Seq_quality_trim_right_with_ok_min_dont_raise
401     @entry.seq  = "ATCG"
402     @entry.qual = "hhhh"
403
404     [0, 40].each do |min|
405       assert_nothing_raised { @entry.quality_trim_right(min) }
406     end
407   end
408
409   def test_Seq_quality_trim_right_returns_correctly
410     @entry.seq  = "AAAAATCG"
411     @entry.qual = "hhhhhgfe"
412     @entry.quality_trim_right(38)
413     assert_equal("AAAAAT", @entry.seq) 
414     assert_equal("hhhhhg", @entry.qual) 
415   end
416
417   def test_Seq_quality_trim_left_with_missing_seq_raises
418     @entry.qual = "hhhh"
419     assert_raise(SeqError) { @entry.quality_trim_left(20) }
420   end
421
422   def test_Seq_quality_trim_left_with_missing_qual_raises
423     @entry.seq = "ATCG"
424     assert_raise(SeqError) { @entry.quality_trim_left(20) }
425   end
426
427   def test_Seq_quality_trim_left_with_bad_min_raises
428     @entry.seq  = "ATCG"
429     @entry.qual = "hhhh"
430
431     [-1, 41].each do |min|
432       assert_raise(SeqError) { @entry.quality_trim_left(min) }
433     end
434   end
435
436   def test_Seq_quality_trim_left_with_ok_min_dont_raise
437     @entry.seq  = "ATCG"
438     @entry.qual = "hhhh"
439
440     [0, 40].each do |min|
441       assert_nothing_raised { @entry.quality_trim_left(min) }
442     end
443   end
444
445   def test_Seq_quality_trim_left_returns_correctly
446     @entry.seq  = "GCTAAAAA"
447     @entry.qual = "efghhhhh"
448     @entry.quality_trim_left(38)
449     assert_equal("TAAAAA", @entry.seq) 
450     assert_equal("ghhhhh", @entry.qual) 
451   end
452
453   def test_Seq_quality_trim_returns_correctly
454     @entry.seq  = "GCTAAAAAGTG"
455     @entry.qual = "efghhhhhgfe"
456     @entry.quality_trim(38)
457     assert_equal("TAAAAAG", @entry.seq) 
458     assert_equal("ghhhhhg", @entry.qual) 
459   end
460
461   def test_Seq_indels_remove_without_qual_returns_correctly
462     @entry.seq  = "A-T.CG~CG"
463     @entry.qual = nil
464     assert_equal("ATCGCG", @entry.indels_remove.seq)
465   end
466
467   def test_Seq_indels_remove_with_qual_returns_correctly
468     @entry.seq  = "A-T.CG~CG"
469     @entry.qual = "a@b@cd@fg"
470     assert_equal("ATCGCG", @entry.indels_remove.seq)
471     assert_equal("abcdfg", @entry.indels_remove.qual)
472   end
473
474   def test_Seq_composition_returns_correctly
475     @entry.seq = "AAAATTTCCG"
476     assert_equal(4, @entry.composition["A"])
477     assert_equal(3, @entry.composition["T"])
478     assert_equal(2, @entry.composition["C"])
479     assert_equal(1, @entry.composition["G"])
480     assert_equal(0, @entry.composition["X"])
481   end
482
483   def test_Seq_homopol_max_returns_0_with_empty_sequence
484     @entry.seq = ""
485     assert_equal(0, @entry.homopol_max)
486   end
487
488   def test_Seq_homopol_max_returns_0_with_nil_sequence
489     @entry.seq = nil
490     assert_equal(0, @entry.homopol_max)
491   end
492
493   def test_Seq_homopol_max_returns_0_when_not_found
494     @entry.seq = "AtTcCcGggGnnNnn"
495     assert_equal(0, @entry.homopol_max(6))
496   end
497
498   def test_Seq_homopol_max_returns_correctly
499     @entry.seq = "AtTcCcGggGnnNnn"
500     assert_equal(5, @entry.homopol_max(3))
501   end
502
503   def test_Seq_hard_mask_returns_correctly
504     @entry.seq = "--AAAANn"
505     assert_equal(33.33, @entry.hard_mask)
506   end
507
508   def test_Seq_soft_mask_returns_correctly
509     @entry.seq = "--AAAa"
510     assert_equal(25.00, @entry.soft_mask)
511   end
512
513   def test_Seq_mask_seq_hard_bang_with_nil_seq_raises
514     @entry.seq  = nil
515     @entry.qual = ""
516
517     assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
518   end
519
520   def test_Seq_mask_seq_hard_bang_with_nil_qual_raises
521     @entry.seq  = ""
522     @entry.qual = nil
523
524     assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
525   end
526
527   def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises
528     assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
529     assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
530   end
531
532   def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise
533     @entry.seq  = "ATCG"
534     @entry.qual = "RSTU"
535
536     assert_nothing_raised { @entry.mask_seq_hard!(0) }
537     assert_nothing_raised { @entry.mask_seq_hard!(40) }
538   end
539
540   def test_Seq_mask_seq_hard_bang_returns_correctly
541     @entry.seq  = "ATCG"
542     @entry.qual = "RSTU"
543
544     assert_equal("NNCG", @entry.mask_seq_hard!(20).seq)
545   end
546
547   def test_Seq_mask_seq_soft_bang_with_nil_seq_raises
548     @entry.seq  = nil
549     @entry.qual = ""
550
551     assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
552   end
553
554   def test_Seq_mask_seq_soft_bang_with_nil_qual_raises
555     @entry.seq  = ""
556     @entry.qual = nil
557
558     assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
559   end
560
561   def test_Seq_mask_seq_soft_bang_returns_correctly
562     @entry.seq  = "ATCG"
563     @entry.qual = "RSTU"
564
565     assert_equal("atCG", @entry.mask_seq_soft!(20).seq)
566   end
567
568   def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises
569     assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
570     assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
571   end
572
573   def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise
574     @entry.seq  = "ATCG"
575     @entry.qual = "RSTU"
576
577     assert_nothing_raised { @entry.mask_seq_soft!(0) }
578     assert_nothing_raised { @entry.mask_seq_soft!(40) }
579   end
580 end
581
582
583 __END__