]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/test/maasha/test_seq.rb
added quality_trim to seq.rb along with unit tests
[biopieces.git] / code_ruby / test / maasha / test_seq.rb
1 #!/usr/bin/env ruby
2
3 require 'maasha/seq'
4 require 'test/unit'
5 require 'pp'
6
7 class TestSeq < Test::Unit::TestCase 
8   def setup
9     @entry = Seq.new
10   end
11
12   #  def test_Seq# autoremoves whitespace, newlines, and carriage returns
13   #    dna = Seq.new
14   #    dna.seq = "A\tT\r\tC\nG  "
15   #    assert_equal(dna.seq, "ATCG")
16   #  end
17   
18   def test_Seq_new_bp_returns_correctly
19     record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
20     seq    = Seq.new_bp(record)
21     assert_equal("test", seq.seq_name)
22     assert_equal("ATCG", seq.seq)
23     assert_equal("dna",  seq.type)
24     assert_equal("hhhh", seq.qual)
25   end
26
27   def test_Seq_is_dna_with_no_sequence_type_returns_false
28     assert(@entry.is_dna? == false)
29   end
30
31   def test_Seq_is_dna_with_dna_sequence_type_returns_true
32     @entry.type = 'dna'
33     assert(@entry.is_dna? == true)
34   end
35
36   def test_Seq_is_rna_with_no_sequence_type_returns_false
37     assert(@entry.is_rna? == false)
38   end
39
40   def test_Seq_is_rna_with_rna_sequence_type_returns_true
41     @entry.type = 'rna'
42     assert(@entry.is_rna? == true)
43   end
44
45   def test_Seq_is_protein_with_no_sequence_type_returns_false
46     assert(@entry.is_protein? == false)
47   end
48
49   def test_Seq_is_protein_with_protein_sequence_type_returns_true
50     @entry.type = 'protein'
51     assert_equal(true, @entry.is_protein?)
52   end
53
54   def test_Seq_type_guess_without_sequence_raises
55     assert_raise(SeqError) { @entry.type_guess }
56   end
57
58   def test_Seq_type_guess_with_protein_returns_protein
59     @entry.seq = 'atcatcrFgatcg'
60     assert_equal('protein', @entry.type_guess)
61   end
62
63   def test_Seq_type_guess_with_rna_returns_rna
64     @entry.seq = 'atcatcrUgatcg'
65     assert_equal('rna', @entry.type_guess)
66   end
67
68   def test_Seq_type_guess_with_dna_returns_dna
69     @entry.seq = 'atcatcgatcg'
70     assert_equal('dna', @entry.type_guess)
71   end
72
73   def test_Seq_type_guess_EM_without_sequence_raises
74     assert_raise(SeqError) { @entry.type_guess! }
75   end
76
77   def test_Seq_type_guess_EM_with_protein_returns_protein
78     @entry.seq = 'atcatcrFgatcg'
79     @entry.type_guess!
80     assert_equal('protein', @entry.type)
81   end
82
83   def test_Seq_type_guess_EM_with_rna_returns_rna
84     @entry.seq = 'atcatcrUgatcg'
85     @entry.type_guess!
86     assert_equal('rna', @entry.type)
87   end
88
89   def test_Seq_type_guess_EM_with_dna_returns_dna
90     @entry.seq = 'atcatcgatcg'
91     @entry.type_guess!
92     assert_equal('dna', @entry.type)
93   end
94
95   def test_Seq_length_is_correct
96     @entry.seq = 'ATCG'
97     assert_equal(4, @entry.length)
98   end
99
100   def test_Seq_indels_is_correct
101     @entry.seq = 'ATCG.-~_'
102     assert_equal(4, @entry.indels)
103   end
104
105   def test_Seq_to_rna_raises_if_no_sequence
106     @entry.type = 'dna'
107     assert_raise(SeqError) { @entry.to_rna }
108   end
109
110   def test_Seq_to_rna_raises_on_bad_type
111     @entry.seq  = 'ATCG'
112     @entry.type = 'rna'
113     assert_raise(SeqError) { @entry.to_rna }
114   end
115
116   def test_Seq_to_rna_transcribes_correctly
117     @entry.seq  = 'ATCGatcg'
118     @entry.type = 'dna'
119     assert_equal("AUCGaucg", @entry.to_rna)
120   end
121
122   def test_Seq_to_rna_changes_entry_type_to_rna
123     @entry.seq  = 'ATCGatcg'
124     @entry.type = 'dna'
125     @entry.to_rna
126     assert_equal("rna", @entry.type)
127   end
128
129   def test_Seq_to_dna_raises_if_no_sequence
130     @entry.type = 'rna'
131     assert_raise(SeqError) { @entry.to_dna }
132   end
133
134   def test_Seq_to_dna_raises_on_bad_type
135     @entry.seq  = 'AUCG'
136     @entry.type = 'dna'
137     assert_raise(SeqError) { @entry.to_dna }
138   end
139
140   def test_Seq_to_dna_transcribes_correctly
141     @entry.seq  = 'AUCGaucg'
142     @entry.type = 'rna'
143     assert_equal("ATCGatcg", @entry.to_dna)
144   end
145
146   def test_Seq_to_dna_changes_entry_type_to_dna
147     @entry.seq  = 'AUCGaucg'
148     @entry.type = 'rna'
149     @entry.to_dna
150     assert_equal("dna", @entry.type)
151   end
152
153   def test_Seq_to_bp_returns_correct_record
154     @entry.seq_name = 'test'
155     @entry.seq      = 'ATCG'
156     assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
157   end
158
159   def test_Seq_to_bp_raises_on_missing_seq_name
160     @entry.seq = 'ATCG'
161     assert_raise(SeqError) { @entry.to_bp }
162   end
163
164   def test_Seq_to_bp_raises_on_missing_sequence
165     @entry.seq_name = 'test'
166     assert_raise(SeqError) { @entry.to_bp }
167   end
168
169   def test_Seq_to_fasta_returns_correct_entry
170     @entry.seq_name = 'test'
171     @entry.seq      = 'ATCG'
172     assert_equal(">test\nATCG\n", @entry.to_fasta)
173   end
174
175   def test_Seq_to_fasta_wraps_correctly
176     entry = Seq.new("test", "ATCG")
177     assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
178   end
179
180   def test_Seq_to_fastq_returns_correct_entry
181     @entry.seq_name = 'test'
182     @entry.seq      = 'ATCG'
183     @entry.qual     = 'hhhh'
184     assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
185   end
186
187   def test_Seq_to_key_with_bad_residue_raises
188     entry = Seq.new("test", "AUCG")
189     assert_raise(SeqError) { entry.to_key }
190   end
191
192   def test_Seq_to_key_returns_correctly
193     entry = Seq.new("test", "ATCG")
194     assert_equal(54, entry.to_key)
195   end
196
197   def test_Seq_reverse_returns_correctly
198     @entry.seq = "ATCG"
199     assert_equal("GCTA", @entry.reverse.seq)
200   end
201
202   def test_Seq_complement_raises_if_no_sequence
203     @entry.type = 'dna'
204     assert_raise(SeqError) { @entry.complement }
205   end
206
207   def test_Seq_complement_raises_on_bad_type
208     @entry.seq  = 'ATCG'
209     @entry.type = 'protein'
210     assert_raise(SeqError) { @entry.complement }
211   end
212
213   def test_Seq_complement_for_DNA_is_correct
214     @entry.seq  = 'ATCGatcg'
215     @entry.type = 'dna'
216     assert_equal("TAGCtagc", @entry.complement)
217   end
218
219   def test_Seq_complement_for_RNA_is_correct
220     @entry.seq  = 'AUCGaucg'
221     @entry.type = 'rna'
222     assert_equal("UAGCuagc", @entry.complement)
223   end
224
225   def test_Seq_reverse_complement_for_DNA_is_correct
226     @entry.seq  = 'ATCGatcg'
227     @entry.type = 'dna'
228     assert_equal("cgatCGAT", @entry.reverse_complement.seq)
229   end
230
231   def test_Seq_reverse_complement_for_RNA_is_correct
232     @entry.seq  = 'AUCGaucg'
233     @entry.type = 'rna'
234     assert_equal("cgauCGAU", @entry.reverse_complement.seq)
235   end
236
237   def test_Seq_hamming_distance_returns_correctly
238     seq1 = Seq.new("test1", "ATCG")
239     seq2 = Seq.new("test2", "atgg")
240     assert_equal(1, seq1.hamming_distance(seq2))
241   end
242
243   def test_Seq_generate_with_length_lt_1_raises
244     assert_raise(SeqError) { @entry.generate(-10, "dna") }
245     assert_raise(SeqError) { @entry.generate(0, "dna") }
246   end
247
248   def test_Seq_generate_with_bad_type_raises
249     assert_raise(SeqError) { @entry.generate(10, "foo") }
250   end
251
252   def test_Seq_generate_with_ok_type_dont_raise
253     %w[dna DNA rna RNA protein Protein].each do |type|
254       assert_nothing_raised { @entry.generate(10, type) }
255     end
256   end
257
258   def test_Seq_subseq_with_start_lt_0_raises
259     @entry.seq = "ATCG"
260     assert_raise(SeqError) { @entry.subseq(-1, 1) }
261   end
262
263   def test_Seq_subseq_with_length_lt_1_raises
264     @entry.seq = "ATCG"
265     assert_raise(SeqError) { @entry.subseq(0, 0) }
266   end
267
268   def test_Seq_subseq_with_start_plus_length_gt_seq_raises
269     @entry.seq = "ATCG"
270     assert_raise(SeqError) { @entry.subseq(0, 5) }
271   end
272
273   def test_Seq_subseq_returns_correct_sequence
274     @entry.seq  = "ATCG"
275     assert_equal("AT", @entry.subseq(0, 2).seq)
276     assert_equal("CG", @entry.subseq(2, 2).seq)
277   end
278
279   def test_Seq_subseq_without_len_returns_correct_sequence
280     @entry.seq  = "ATCG"
281     assert_equal("ATCG", @entry.subseq(0).seq)
282     assert_equal("CG",   @entry.subseq(2).seq)
283   end
284
285   def test_Seq_subseq_returns_correct_qual
286     @entry.seq  = "ATCG"
287     @entry.qual = "abcd"
288     assert_equal("ab", @entry.subseq(0, 2).qual)
289     assert_equal("cd", @entry.subseq(2, 2).qual)
290   end
291
292   def test_Seq_subseq_without_len_returns_correct_qual
293     @entry.seq  = "ATCG"
294     @entry.qual = "abcd"
295     assert_equal("abcd", @entry.subseq(0).qual)
296     assert_equal("cd",   @entry.subseq(2).qual)
297   end
298
299   def test_Seq_subseq_bang_with_start_lt_0_raises
300     @entry.seq = "ATCG"
301     assert_raise(SeqError) { @entry.subseq!(-1, 1) }
302   end
303
304   def test_Seq_subseq_bang_with_length_lt_1_raises
305     @entry.seq = "ATCG"
306     assert_raise(SeqError) { @entry.subseq!(0, 0) }
307   end
308
309   def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
310     @entry.seq = "ATCG"
311     assert_raise(SeqError) { @entry.subseq!(0, 5) }
312   end
313
314   def test_Seq_subseq_bang_returns_correct_sequence
315     @entry.seq  = "ATCG"
316     @entry.subseq!(0, 2)
317     assert_equal("AT", @entry.seq)
318     @entry.seq  = "ATCG"
319     @entry.subseq!(2, 2)
320     assert_equal("CG", @entry.seq)
321   end
322
323   def test_Seq_subseq_bang_without_len_returns_correct_sequence
324     @entry.seq  = "ATCG"
325     @entry.subseq!(0)
326     assert_equal("ATCG", @entry.seq)
327     @entry.seq  = "ATCG"
328     @entry.subseq!(2)
329     assert_equal("CG", @entry.seq)
330   end
331
332   def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
333     @entry.seq  = "ATCG"
334     @entry.qual = "abcd"
335     @entry.subseq!(0, 2)
336     assert_equal("ab", @entry.qual)
337     @entry.seq  = "ATCG"
338     @entry.qual = "abcd"
339     @entry.subseq!(2, 2)
340     assert_equal("cd", @entry.qual)
341   end
342
343   def test_Seq_subseq_bang_with_pos_returns_correct_qual
344     @entry.seq  = "ATCG"
345     @entry.qual = "abcd"
346     @entry.subseq!(0)
347     assert_equal("abcd", @entry.qual)
348     @entry.seq  = "ATCG"
349     @entry.qual = "abcd"
350     @entry.subseq!(2)
351     assert_equal("cd", @entry.qual)
352   end
353
354   def test_Seq_subseq_rand_returns_correct_sequence
355     @entry.seq  = "ATCG"
356     assert_equal("ATCG", @entry.subseq_rand(4).seq)
357   end
358
359   def test_Seq_quality_trim_right_with_missing_seq_raises
360     @entry.qual = "hhhh"
361     assert_raise(SeqError) { @entry.quality_trim_right(20) }
362   end
363
364   def test_Seq_quality_trim_right_with_missing_qual_raises
365     @entry.seq = "ATCG"
366     assert_raise(SeqError) { @entry.quality_trim_right(20) }
367   end
368
369   def test_Seq_quality_trim_right_with_bad_min_raises
370     @entry.seq  = "ATCG"
371     @entry.qual = "hhhh"
372
373     [-1, 41].each do |min|
374       assert_raise(SeqError) { @entry.quality_trim_right(min) }
375     end
376   end
377
378   def test_Seq_quality_trim_right_with_ok_min_dont_raise
379     @entry.seq  = "ATCG"
380     @entry.qual = "hhhh"
381
382     [0, 40].each do |min|
383       assert_nothing_raised { @entry.quality_trim_right(min) }
384     end
385   end
386
387   def test_Seq_quality_trim_right_returns_correctly
388     @entry.seq  = "AAAAATCG"
389     @entry.qual = "hhhhhgfe"
390     @entry.quality_trim_right(38)
391     assert_equal("AAAAAT", @entry.seq) 
392     assert_equal("hhhhhg", @entry.qual) 
393   end
394
395   def test_Seq_quality_trim_left_with_missing_seq_raises
396     @entry.qual = "hhhh"
397     assert_raise(SeqError) { @entry.quality_trim_left(20) }
398   end
399
400   def test_Seq_quality_trim_left_with_missing_qual_raises
401     @entry.seq = "ATCG"
402     assert_raise(SeqError) { @entry.quality_trim_left(20) }
403   end
404
405   def test_Seq_quality_trim_left_with_bad_min_raises
406     @entry.seq  = "ATCG"
407     @entry.qual = "hhhh"
408
409     [-1, 41].each do |min|
410       assert_raise(SeqError) { @entry.quality_trim_left(min) }
411     end
412   end
413
414   def test_Seq_quality_trim_left_with_ok_min_dont_raise
415     @entry.seq  = "ATCG"
416     @entry.qual = "hhhh"
417
418     [0, 40].each do |min|
419       assert_nothing_raised { @entry.quality_trim_left(min) }
420     end
421   end
422
423   def test_Seq_quality_trim_left_returns_correctly
424     @entry.seq  = "GCTAAAAA"
425     @entry.qual = "efghhhhh"
426     @entry.quality_trim_left(38)
427     assert_equal("TAAAAA", @entry.seq) 
428     assert_equal("ghhhhh", @entry.qual) 
429   end
430
431   def test_Seq_quality_trim_returns_correctly
432     @entry.seq  = "GCTAAAAAGTG"
433     @entry.qual = "efghhhhhgfe"
434     @entry.quality_trim(38)
435     assert_equal("TAAAAAG", @entry.seq) 
436     assert_equal("ghhhhhg", @entry.qual) 
437   end
438
439   def test_Seq_composition_returns_correctly
440     @entry.seq = "AAAATTTCCG"
441     assert_equal(4, @entry.composition["A"])
442     assert_equal(3, @entry.composition["T"])
443     assert_equal(2, @entry.composition["C"])
444     assert_equal(1, @entry.composition["G"])
445     assert_equal(0, @entry.composition["X"])
446   end
447
448   def test_Seq_homopol_max_returns_0_with_empty_sequence
449     @entry.seq = ""
450     assert_equal(0, @entry.homopol_max)
451   end
452
453   def test_Seq_homopol_max_returns_0_with_nil_sequence
454     @entry.seq = nil
455     assert_equal(0, @entry.homopol_max)
456   end
457
458   def test_Seq_homopol_max_returns_0_when_not_found
459     @entry.seq = "AtTcCcGggGnnNnn"
460     assert_equal(0, @entry.homopol_max(6))
461   end
462
463   def test_Seq_homopol_max_returns_correctly
464     @entry.seq = "AtTcCcGggGnnNnn"
465     assert_equal(5, @entry.homopol_max(3))
466   end
467
468   def test_Seq_hard_mask_returns_correctly
469     @entry.seq = "--AAAANn"
470     assert_equal(33.33, @entry.hard_mask)
471   end
472
473   def test_Seq_soft_mask_returns_correctly
474     @entry.seq = "--AAAa"
475     assert_equal(25.00, @entry.soft_mask)
476   end
477 end
478
479
480 __END__