]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/test/maasha/test_seq.rb
added hamming_distance method to seq.rb
[biopieces.git] / code_ruby / test / maasha / test_seq.rb
1 #!/usr/bin/env ruby
2
3 require 'maasha/seq'
4 require 'test/unit'
5 require 'pp'
6
7 class TestSeq < Test::Unit::TestCase 
8   def setup
9     @entry = Seq.new
10   end
11
12   #  def test_Seq# autoremoves whitespace, newlines, and carriage returns
13   #    dna = Seq.new
14   #    dna.seq = "A\tT\r\tC\nG  "
15   #    assert_equal(dna.seq, "ATCG")
16   #  end
17
18   def test_Seq_is_dna_with_no_sequence_type_returns_false
19     assert(@entry.is_dna? == false)
20   end
21
22   def test_Seq_is_dna_with_dna_sequence_type_returns_true
23     @entry.type = 'dna'
24     assert(@entry.is_dna? == true)
25   end
26
27   def test_Seq_is_rna_with_no_sequence_type_returns_false
28     assert(@entry.is_rna? == false)
29   end
30
31   def test_Seq_is_rna_with_rna_sequence_type_returns_true
32     @entry.type = 'rna'
33     assert(@entry.is_rna? == true)
34   end
35
36   def test_Seq_is_protein_with_no_sequence_type_returns_false
37     assert(@entry.is_protein? == false)
38   end
39
40   def test_Seq_is_protein_with_protein_sequence_type_returns_true
41     @entry.type = 'protein'
42     assert_equal(true, @entry.is_protein?)
43   end
44
45   def test_Seq_type_guess_without_sequence_raises
46     assert_raise(SeqError) { @entry.type_guess }
47   end
48
49   def test_Seq_type_guess_with_protein_returns_protein
50     @entry.seq = 'atcatcrFgatcg'
51     assert_equal('protein', @entry.type_guess)
52   end
53
54   def test_Seq_type_guess_with_rna_returns_rna
55     @entry.seq = 'atcatcrUgatcg'
56     assert_equal('rna', @entry.type_guess)
57   end
58
59   def test_Seq_type_guess_with_dna_returns_dna
60     @entry.seq = 'atcatcgatcg'
61     assert_equal('dna', @entry.type_guess)
62   end
63
64   def test_Seq_type_guess_EM_without_sequence_raises
65     assert_raise(SeqError) { @entry.type_guess! }
66   end
67
68   def test_Seq_type_guess_EM_with_protein_returns_protein
69     @entry.seq = 'atcatcrFgatcg'
70     @entry.type_guess!
71     assert_equal('protein', @entry.type)
72   end
73
74   def test_Seq_type_guess_EM_with_rna_returns_rna
75     @entry.seq = 'atcatcrUgatcg'
76     @entry.type_guess!
77     assert_equal('rna', @entry.type)
78   end
79
80   def test_Seq_type_guess_EM_with_dna_returns_dna
81     @entry.seq = 'atcatcgatcg'
82     @entry.type_guess!
83     assert_equal('dna', @entry.type)
84   end
85
86   def test_Seq_length_is_correct
87     @entry.seq = 'ATCG'
88     assert_equal(4, @entry.length)
89   end
90
91   def test_Seq_indels_is_correct
92     @entry.seq = 'ATCG.-~_'
93     assert_equal(4, @entry.indels)
94   end
95
96   def test_Seq_to_rna_raises_if_no_sequence
97     @entry.type = 'dna'
98     assert_raise(SeqError) { @entry.to_rna }
99   end
100
101   def test_Seq_to_rna_raises_on_bad_type
102     @entry.seq  = 'ATCG'
103     @entry.type = 'rna'
104     assert_raise(SeqError) { @entry.to_rna }
105   end
106
107   def test_Seq_to_rna_transcribes_correctly
108     @entry.seq  = 'ATCGatcg'
109     @entry.type = 'dna'
110     assert_equal("AUCGaucg", @entry.to_rna)
111   end
112
113   def test_Seq_to_rna_changes_entry_type_to_rna
114     @entry.seq  = 'ATCGatcg'
115     @entry.type = 'dna'
116     @entry.to_rna
117     assert_equal("rna", @entry.type)
118   end
119
120   def test_Seq_to_dna_raises_if_no_sequence
121     @entry.type = 'rna'
122     assert_raise(SeqError) { @entry.to_dna }
123   end
124
125   def test_Seq_to_dna_raises_on_bad_type
126     @entry.seq  = 'AUCG'
127     @entry.type = 'dna'
128     assert_raise(SeqError) { @entry.to_dna }
129   end
130
131   def test_Seq_to_dna_transcribes_correctly
132     @entry.seq  = 'AUCGaucg'
133     @entry.type = 'rna'
134     assert_equal("ATCGatcg", @entry.to_dna)
135   end
136
137   def test_Seq_to_dna_changes_entry_type_to_dna
138     @entry.seq  = 'AUCGaucg'
139     @entry.type = 'rna'
140     @entry.to_dna
141     assert_equal("dna", @entry.type)
142   end
143
144   def test_Seq_to_bp_returns_correct_record
145     @entry.seq_name = 'test'
146     @entry.seq      = 'ATCG'
147     assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
148   end
149
150   def test_Seq_to_bp_raises_on_missing_seq_name
151     @entry.seq = 'ATCG'
152     assert_raise(SeqError) { @entry.to_bp }
153   end
154
155   def test_Seq_to_bp_raises_on_missing_sequence
156     @entry.seq_name = 'test'
157     assert_raise(SeqError) { @entry.to_bp }
158   end
159
160   def test_Seq_to_fasta_returns_correct_entry
161     @entry.seq_name = 'test'
162     @entry.seq      = 'ATCG'
163     assert_equal(">test\nATCG\n", @entry.to_fasta)
164   end
165
166   def test_Seq_to_fasta_wraps_correctly
167     entry = Seq.new("test", "ATCG")
168     assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
169   end
170
171   def test_Seq_to_key_with_bad_residue_raises
172     entry = Seq.new("test", "AUCG")
173     assert_raise(SeqError) { entry.to_key }
174   end
175
176   def test_Seq_to_key_returns_correctly
177     entry = Seq.new("test", "ATCG")
178     assert_equal(54, entry.to_key)
179   end
180
181   def test_Seq_reverse_returns_correctly
182     @entry.seq = "ATCG"
183     assert_equal("GCTA", @entry.reverse)
184   end
185
186   def test_Seq_complement_raises_if_no_sequence
187     @entry.type = 'dna'
188     assert_raise(SeqError) { @entry.complement }
189   end
190
191   def test_Seq_complement_raises_on_bad_type
192     @entry.seq  = 'ATCG'
193     @entry.type = 'protein'
194     assert_raise(SeqError) { @entry.complement }
195   end
196
197   def test_Seq_complement_for_DNA_is_correct
198     @entry.seq  = 'ATCGatcg'
199     @entry.type = 'dna'
200     assert_equal("TAGCtagc", @entry.complement)
201   end
202
203   def test_Seq_complement_for_RNA_is_correct
204     @entry.seq  = 'AUCGaucg'
205     @entry.type = 'rna'
206     assert_equal("UAGCuagc", @entry.complement)
207   end
208
209   def test_Seq_reverse_complement_for_DNA_is_correct
210     @entry.seq  = 'ATCGatcg'
211     @entry.type = 'dna'
212     assert_equal("cgatCGAT", @entry.reverse_complement)
213   end
214
215   def test_Seq_reverse_complement_for_RNA_is_correct
216     @entry.seq  = 'AUCGaucg'
217     @entry.type = 'rna'
218     assert_equal("cgauCGAU", @entry.reverse_complement)
219   end
220
221   def test_Seq_hamming_distance_returns_correctly
222     seq1 = Seq.new("test1", "ATCG")
223     seq2 = Seq.new("test2", "atgg")
224     assert_equal(1, seq1.hamming_distance(seq2))
225   end
226
227   def test_Seq_generate_with_length_lt_1_raises
228     assert_raise(SeqError) { @entry.generate(-10, "dna") }
229     assert_raise(SeqError) { @entry.generate(0, "dna") }
230   end
231
232   def test_Seq_generate_with_bad_type_raises
233     assert_raise(SeqError) { @entry.generate(10, "foo") }
234   end
235
236   def test_Seq_generate_with_ok_type_dont_raise
237     %w[dna DNA rna RNA protein Protein].each do |type|
238       assert_nothing_raised { @entry.generate(10, type) }
239     end
240   end
241
242   def test_Seq_subseq_with_start_lt_0_raises
243     @entry.seq = "ATCG"
244     assert_raise(SeqError) { @entry.subseq(-1, 1) }
245   end
246
247   def test_Seq_subseq_with_length_lt_1_raises
248     @entry.seq = "ATCG"
249     assert_raise(SeqError) { @entry.subseq(0, 0) }
250   end
251
252   def test_Seq_subseq_with_start_plus_length_gt_seq_raises
253     @entry.seq = "ATCG"
254     assert_raise(SeqError) { @entry.subseq(0, 5) }
255   end
256
257   def test_Seq_subseq_returns_correct_sequence
258     @entry.seq  = "ATCG"
259     assert_equal("AT", @entry.subseq(0, 2).seq)
260     assert_equal("CG", @entry.subseq(2, 2).seq)
261   end
262
263   def test_Seq_subseq_without_len_returns_correct_sequence
264     @entry.seq  = "ATCG"
265     assert_equal("ATCG", @entry.subseq(0).seq)
266     assert_equal("CG",   @entry.subseq(2).seq)
267   end
268
269   def test_Seq_subseq_returns_correct_qual
270     @entry.seq  = "ATCG"
271     @entry.qual = "abcd"
272     assert_equal("ab", @entry.subseq(0, 2).qual)
273     assert_equal("cd", @entry.subseq(2, 2).qual)
274   end
275
276   def test_Seq_subseq_without_len_returns_correct_qual
277     @entry.seq  = "ATCG"
278     @entry.qual = "abcd"
279     assert_equal("abcd", @entry.subseq(0).qual)
280     assert_equal("cd",   @entry.subseq(2).qual)
281   end
282
283   def test_Seq_subseq_bang_with_start_lt_0_raises
284     @entry.seq = "ATCG"
285     assert_raise(SeqError) { @entry.subseq!(-1, 1) }
286   end
287
288   def test_Seq_subseq_bang_with_length_lt_1_raises
289     @entry.seq = "ATCG"
290     assert_raise(SeqError) { @entry.subseq!(0, 0) }
291   end
292
293   def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
294     @entry.seq = "ATCG"
295     assert_raise(SeqError) { @entry.subseq!(0, 5) }
296   end
297
298   def test_Seq_subseq_bang_returns_correct_sequence
299     @entry.seq  = "ATCG"
300     @entry.subseq!(0, 2)
301     assert_equal("AT", @entry.seq)
302     @entry.seq  = "ATCG"
303     @entry.subseq!(2, 2)
304     assert_equal("CG", @entry.seq)
305   end
306
307   def test_Seq_subseq_bang_without_len_returns_correct_sequence
308     @entry.seq  = "ATCG"
309     @entry.subseq!(0)
310     assert_equal("ATCG", @entry.seq)
311     @entry.seq  = "ATCG"
312     @entry.subseq!(2)
313     assert_equal("CG", @entry.seq)
314   end
315
316   def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
317     @entry.seq  = "ATCG"
318     @entry.qual = "abcd"
319     @entry.subseq!(0, 2)
320     assert_equal("ab", @entry.qual)
321     @entry.seq  = "ATCG"
322     @entry.qual = "abcd"
323     @entry.subseq!(2, 2)
324     assert_equal("cd", @entry.qual)
325   end
326
327   def test_Seq_subseq_bang_with_pos_returns_correct_qual
328     @entry.seq  = "ATCG"
329     @entry.qual = "abcd"
330     @entry.subseq!(0)
331     assert_equal("abcd", @entry.qual)
332     @entry.seq  = "ATCG"
333     @entry.qual = "abcd"
334     @entry.subseq!(2)
335     assert_equal("cd", @entry.qual)
336   end
337
338   def test_Seq_subseq_rand_returns_correct_sequence
339     @entry.seq  = "ATCG"
340     assert_equal("ATCG", @entry.subseq_rand(4).seq)
341   end
342
343   def test_Seq_composition_returns_correctly
344     @entry.seq = "AAAATTTCCG"
345     assert_equal(4, @entry.composition["A"])
346     assert_equal(3, @entry.composition["T"])
347     assert_equal(2, @entry.composition["C"])
348     assert_equal(1, @entry.composition["G"])
349     assert_equal(0, @entry.composition["X"])
350   end
351
352   def test_Seq_homopol_max_returns_0_with_empty_sequence
353     @entry.seq = ""
354     assert_equal(0, @entry.homopol_max)
355   end
356
357   def test_Seq_homopol_max_returns_0_with_nil_sequence
358     @entry.seq = nil
359     assert_equal(0, @entry.homopol_max)
360   end
361
362   def test_Seq_homopol_max_returns_0_when_not_found
363     @entry.seq = "AtTcCcGggGnnNnn"
364     assert_equal(0, @entry.homopol_max(6))
365   end
366
367   def test_Seq_homopol_max_returns_correctly
368     @entry.seq = "AtTcCcGggGnnNnn"
369     assert_equal(5, @entry.homopol_max(3))
370   end
371
372   def test_Seq_hard_mask_returns_correctly
373     @entry.seq = "--AAAANn"
374     assert_equal(33.33, @entry.hard_mask)
375   end
376
377   def test_Seq_soft_mask_returns_correctly
378     @entry.seq = "--AAAa"
379     assert_equal(25.00, @entry.soft_mask)
380   end
381 end
382
383
384 __END__