]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/test/maasha/test_seq.rb
fixed unit tests for reverse and revcomp in seq.rb
[biopieces.git] / code_ruby / test / maasha / test_seq.rb
1 #!/usr/bin/env ruby
2
3 require 'maasha/seq'
4 require 'test/unit'
5 require 'pp'
6
7 class TestSeq < Test::Unit::TestCase 
8   def setup
9     @entry = Seq.new
10   end
11
12   #  def test_Seq# autoremoves whitespace, newlines, and carriage returns
13   #    dna = Seq.new
14   #    dna.seq = "A\tT\r\tC\nG  "
15   #    assert_equal(dna.seq, "ATCG")
16   #  end
17   
18   def test_Seq_new_bp_returns_correctly
19     record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"}
20     seq    = Seq.new_bp(record)
21     assert_equal("test", seq.seq_name)
22     assert_equal("ATCG", seq.seq)
23     assert_equal("dna",  seq.type)
24     assert_equal("hhhh", seq.qual)
25   end
26
27   def test_Seq_is_dna_with_no_sequence_type_returns_false
28     assert(@entry.is_dna? == false)
29   end
30
31   def test_Seq_is_dna_with_dna_sequence_type_returns_true
32     @entry.type = 'dna'
33     assert(@entry.is_dna? == true)
34   end
35
36   def test_Seq_is_rna_with_no_sequence_type_returns_false
37     assert(@entry.is_rna? == false)
38   end
39
40   def test_Seq_is_rna_with_rna_sequence_type_returns_true
41     @entry.type = 'rna'
42     assert(@entry.is_rna? == true)
43   end
44
45   def test_Seq_is_protein_with_no_sequence_type_returns_false
46     assert(@entry.is_protein? == false)
47   end
48
49   def test_Seq_is_protein_with_protein_sequence_type_returns_true
50     @entry.type = 'protein'
51     assert_equal(true, @entry.is_protein?)
52   end
53
54   def test_Seq_type_guess_without_sequence_raises
55     assert_raise(SeqError) { @entry.type_guess }
56   end
57
58   def test_Seq_type_guess_with_protein_returns_protein
59     @entry.seq = 'atcatcrFgatcg'
60     assert_equal('protein', @entry.type_guess)
61   end
62
63   def test_Seq_type_guess_with_rna_returns_rna
64     @entry.seq = 'atcatcrUgatcg'
65     assert_equal('rna', @entry.type_guess)
66   end
67
68   def test_Seq_type_guess_with_dna_returns_dna
69     @entry.seq = 'atcatcgatcg'
70     assert_equal('dna', @entry.type_guess)
71   end
72
73   def test_Seq_type_guess_EM_without_sequence_raises
74     assert_raise(SeqError) { @entry.type_guess! }
75   end
76
77   def test_Seq_type_guess_EM_with_protein_returns_protein
78     @entry.seq = 'atcatcrFgatcg'
79     @entry.type_guess!
80     assert_equal('protein', @entry.type)
81   end
82
83   def test_Seq_type_guess_EM_with_rna_returns_rna
84     @entry.seq = 'atcatcrUgatcg'
85     @entry.type_guess!
86     assert_equal('rna', @entry.type)
87   end
88
89   def test_Seq_type_guess_EM_with_dna_returns_dna
90     @entry.seq = 'atcatcgatcg'
91     @entry.type_guess!
92     assert_equal('dna', @entry.type)
93   end
94
95   def test_Seq_length_is_correct
96     @entry.seq = 'ATCG'
97     assert_equal(4, @entry.length)
98   end
99
100   def test_Seq_indels_is_correct
101     @entry.seq = 'ATCG.-~_'
102     assert_equal(4, @entry.indels)
103   end
104
105   def test_Seq_to_rna_raises_if_no_sequence
106     @entry.type = 'dna'
107     assert_raise(SeqError) { @entry.to_rna }
108   end
109
110   def test_Seq_to_rna_raises_on_bad_type
111     @entry.seq  = 'ATCG'
112     @entry.type = 'rna'
113     assert_raise(SeqError) { @entry.to_rna }
114   end
115
116   def test_Seq_to_rna_transcribes_correctly
117     @entry.seq  = 'ATCGatcg'
118     @entry.type = 'dna'
119     assert_equal("AUCGaucg", @entry.to_rna)
120   end
121
122   def test_Seq_to_rna_changes_entry_type_to_rna
123     @entry.seq  = 'ATCGatcg'
124     @entry.type = 'dna'
125     @entry.to_rna
126     assert_equal("rna", @entry.type)
127   end
128
129   def test_Seq_to_dna_raises_if_no_sequence
130     @entry.type = 'rna'
131     assert_raise(SeqError) { @entry.to_dna }
132   end
133
134   def test_Seq_to_dna_raises_on_bad_type
135     @entry.seq  = 'AUCG'
136     @entry.type = 'dna'
137     assert_raise(SeqError) { @entry.to_dna }
138   end
139
140   def test_Seq_to_dna_transcribes_correctly
141     @entry.seq  = 'AUCGaucg'
142     @entry.type = 'rna'
143     assert_equal("ATCGatcg", @entry.to_dna)
144   end
145
146   def test_Seq_to_dna_changes_entry_type_to_dna
147     @entry.seq  = 'AUCGaucg'
148     @entry.type = 'rna'
149     @entry.to_dna
150     assert_equal("dna", @entry.type)
151   end
152
153   def test_Seq_to_bp_returns_correct_record
154     @entry.seq_name = 'test'
155     @entry.seq      = 'ATCG'
156     assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
157   end
158
159   def test_Seq_to_bp_raises_on_missing_seq_name
160     @entry.seq = 'ATCG'
161     assert_raise(SeqError) { @entry.to_bp }
162   end
163
164   def test_Seq_to_bp_raises_on_missing_sequence
165     @entry.seq_name = 'test'
166     assert_raise(SeqError) { @entry.to_bp }
167   end
168
169   def test_Seq_to_fasta_returns_correct_entry
170     @entry.seq_name = 'test'
171     @entry.seq      = 'ATCG'
172     assert_equal(">test\nATCG\n", @entry.to_fasta)
173   end
174
175   def test_Seq_to_fasta_wraps_correctly
176     entry = Seq.new("test", "ATCG")
177     assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
178   end
179
180   def test_Seq_to_fastq_returns_correct_entry
181     @entry.seq_name = 'test'
182     @entry.seq      = 'ATCG'
183     @entry.qual     = 'hhhh'
184     assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
185   end
186
187   def test_Seq_to_key_with_bad_residue_raises
188     entry = Seq.new("test", "AUCG")
189     assert_raise(SeqError) { entry.to_key }
190   end
191
192   def test_Seq_to_key_returns_correctly
193     entry = Seq.new("test", "ATCG")
194     assert_equal(54, entry.to_key)
195   end
196
197   def test_Seq_reverse_returns_correctly
198     @entry.seq = "ATCG"
199     assert_equal("GCTA", @entry.reverse.seq)
200   end
201
202   def test_Seq_complement_raises_if_no_sequence
203     @entry.type = 'dna'
204     assert_raise(SeqError) { @entry.complement }
205   end
206
207   def test_Seq_complement_raises_on_bad_type
208     @entry.seq  = 'ATCG'
209     @entry.type = 'protein'
210     assert_raise(SeqError) { @entry.complement }
211   end
212
213   def test_Seq_complement_for_DNA_is_correct
214     @entry.seq  = 'ATCGatcg'
215     @entry.type = 'dna'
216     assert_equal("TAGCtagc", @entry.complement)
217   end
218
219   def test_Seq_complement_for_RNA_is_correct
220     @entry.seq  = 'AUCGaucg'
221     @entry.type = 'rna'
222     assert_equal("UAGCuagc", @entry.complement)
223   end
224
225   def test_Seq_reverse_complement_for_DNA_is_correct
226     @entry.seq  = 'ATCGatcg'
227     @entry.type = 'dna'
228     assert_equal("cgatCGAT", @entry.reverse_complement.seq)
229   end
230
231   def test_Seq_reverse_complement_for_RNA_is_correct
232     @entry.seq  = 'AUCGaucg'
233     @entry.type = 'rna'
234     assert_equal("cgauCGAU", @entry.reverse_complement.seq)
235   end
236
237   def test_Seq_hamming_distance_returns_correctly
238     seq1 = Seq.new("test1", "ATCG")
239     seq2 = Seq.new("test2", "atgg")
240     assert_equal(1, seq1.hamming_distance(seq2))
241   end
242
243   def test_Seq_generate_with_length_lt_1_raises
244     assert_raise(SeqError) { @entry.generate(-10, "dna") }
245     assert_raise(SeqError) { @entry.generate(0, "dna") }
246   end
247
248   def test_Seq_generate_with_bad_type_raises
249     assert_raise(SeqError) { @entry.generate(10, "foo") }
250   end
251
252   def test_Seq_generate_with_ok_type_dont_raise
253     %w[dna DNA rna RNA protein Protein].each do |type|
254       assert_nothing_raised { @entry.generate(10, type) }
255     end
256   end
257
258   def test_Seq_subseq_with_start_lt_0_raises
259     @entry.seq = "ATCG"
260     assert_raise(SeqError) { @entry.subseq(-1, 1) }
261   end
262
263   def test_Seq_subseq_with_length_lt_1_raises
264     @entry.seq = "ATCG"
265     assert_raise(SeqError) { @entry.subseq(0, 0) }
266   end
267
268   def test_Seq_subseq_with_start_plus_length_gt_seq_raises
269     @entry.seq = "ATCG"
270     assert_raise(SeqError) { @entry.subseq(0, 5) }
271   end
272
273   def test_Seq_subseq_returns_correct_sequence
274     @entry.seq  = "ATCG"
275     assert_equal("AT", @entry.subseq(0, 2).seq)
276     assert_equal("CG", @entry.subseq(2, 2).seq)
277   end
278
279   def test_Seq_subseq_without_len_returns_correct_sequence
280     @entry.seq  = "ATCG"
281     assert_equal("ATCG", @entry.subseq(0).seq)
282     assert_equal("CG",   @entry.subseq(2).seq)
283   end
284
285   def test_Seq_subseq_returns_correct_qual
286     @entry.seq  = "ATCG"
287     @entry.qual = "abcd"
288     assert_equal("ab", @entry.subseq(0, 2).qual)
289     assert_equal("cd", @entry.subseq(2, 2).qual)
290   end
291
292   def test_Seq_subseq_without_len_returns_correct_qual
293     @entry.seq  = "ATCG"
294     @entry.qual = "abcd"
295     assert_equal("abcd", @entry.subseq(0).qual)
296     assert_equal("cd",   @entry.subseq(2).qual)
297   end
298
299   def test_Seq_subseq_bang_with_start_lt_0_raises
300     @entry.seq = "ATCG"
301     assert_raise(SeqError) { @entry.subseq!(-1, 1) }
302   end
303
304   def test_Seq_subseq_bang_with_length_lt_1_raises
305     @entry.seq = "ATCG"
306     assert_raise(SeqError) { @entry.subseq!(0, 0) }
307   end
308
309   def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
310     @entry.seq = "ATCG"
311     assert_raise(SeqError) { @entry.subseq!(0, 5) }
312   end
313
314   def test_Seq_subseq_bang_returns_correct_sequence
315     @entry.seq  = "ATCG"
316     @entry.subseq!(0, 2)
317     assert_equal("AT", @entry.seq)
318     @entry.seq  = "ATCG"
319     @entry.subseq!(2, 2)
320     assert_equal("CG", @entry.seq)
321   end
322
323   def test_Seq_subseq_bang_without_len_returns_correct_sequence
324     @entry.seq  = "ATCG"
325     @entry.subseq!(0)
326     assert_equal("ATCG", @entry.seq)
327     @entry.seq  = "ATCG"
328     @entry.subseq!(2)
329     assert_equal("CG", @entry.seq)
330   end
331
332   def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
333     @entry.seq  = "ATCG"
334     @entry.qual = "abcd"
335     @entry.subseq!(0, 2)
336     assert_equal("ab", @entry.qual)
337     @entry.seq  = "ATCG"
338     @entry.qual = "abcd"
339     @entry.subseq!(2, 2)
340     assert_equal("cd", @entry.qual)
341   end
342
343   def test_Seq_subseq_bang_with_pos_returns_correct_qual
344     @entry.seq  = "ATCG"
345     @entry.qual = "abcd"
346     @entry.subseq!(0)
347     assert_equal("abcd", @entry.qual)
348     @entry.seq  = "ATCG"
349     @entry.qual = "abcd"
350     @entry.subseq!(2)
351     assert_equal("cd", @entry.qual)
352   end
353
354   def test_Seq_subseq_rand_returns_correct_sequence
355     @entry.seq  = "ATCG"
356     assert_equal("ATCG", @entry.subseq_rand(4).seq)
357   end
358
359   def test_Seq_composition_returns_correctly
360     @entry.seq = "AAAATTTCCG"
361     assert_equal(4, @entry.composition["A"])
362     assert_equal(3, @entry.composition["T"])
363     assert_equal(2, @entry.composition["C"])
364     assert_equal(1, @entry.composition["G"])
365     assert_equal(0, @entry.composition["X"])
366   end
367
368   def test_Seq_homopol_max_returns_0_with_empty_sequence
369     @entry.seq = ""
370     assert_equal(0, @entry.homopol_max)
371   end
372
373   def test_Seq_homopol_max_returns_0_with_nil_sequence
374     @entry.seq = nil
375     assert_equal(0, @entry.homopol_max)
376   end
377
378   def test_Seq_homopol_max_returns_0_when_not_found
379     @entry.seq = "AtTcCcGggGnnNnn"
380     assert_equal(0, @entry.homopol_max(6))
381   end
382
383   def test_Seq_homopol_max_returns_correctly
384     @entry.seq = "AtTcCcGggGnnNnn"
385     assert_equal(5, @entry.homopol_max(3))
386   end
387
388   def test_Seq_hard_mask_returns_correctly
389     @entry.seq = "--AAAANn"
390     assert_equal(33.33, @entry.hard_mask)
391   end
392
393   def test_Seq_soft_mask_returns_correctly
394     @entry.seq = "--AAAa"
395     assert_equal(25.00, @entry.soft_mask)
396   end
397 end
398
399
400 __END__