]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/test/maasha/test_seq.rb
added to_fastq method seq.rb
[biopieces.git] / code_ruby / test / maasha / test_seq.rb
1 #!/usr/bin/env ruby
2
3 require 'maasha/seq'
4 require 'test/unit'
5 require 'pp'
6
7 class TestSeq < Test::Unit::TestCase 
8   def setup
9     @entry = Seq.new
10   end
11
12   #  def test_Seq# autoremoves whitespace, newlines, and carriage returns
13   #    dna = Seq.new
14   #    dna.seq = "A\tT\r\tC\nG  "
15   #    assert_equal(dna.seq, "ATCG")
16   #  end
17
18   def test_Seq_is_dna_with_no_sequence_type_returns_false
19     assert(@entry.is_dna? == false)
20   end
21
22   def test_Seq_is_dna_with_dna_sequence_type_returns_true
23     @entry.type = 'dna'
24     assert(@entry.is_dna? == true)
25   end
26
27   def test_Seq_is_rna_with_no_sequence_type_returns_false
28     assert(@entry.is_rna? == false)
29   end
30
31   def test_Seq_is_rna_with_rna_sequence_type_returns_true
32     @entry.type = 'rna'
33     assert(@entry.is_rna? == true)
34   end
35
36   def test_Seq_is_protein_with_no_sequence_type_returns_false
37     assert(@entry.is_protein? == false)
38   end
39
40   def test_Seq_is_protein_with_protein_sequence_type_returns_true
41     @entry.type = 'protein'
42     assert_equal(true, @entry.is_protein?)
43   end
44
45   def test_Seq_type_guess_without_sequence_raises
46     assert_raise(SeqError) { @entry.type_guess }
47   end
48
49   def test_Seq_type_guess_with_protein_returns_protein
50     @entry.seq = 'atcatcrFgatcg'
51     assert_equal('protein', @entry.type_guess)
52   end
53
54   def test_Seq_type_guess_with_rna_returns_rna
55     @entry.seq = 'atcatcrUgatcg'
56     assert_equal('rna', @entry.type_guess)
57   end
58
59   def test_Seq_type_guess_with_dna_returns_dna
60     @entry.seq = 'atcatcgatcg'
61     assert_equal('dna', @entry.type_guess)
62   end
63
64   def test_Seq_type_guess_EM_without_sequence_raises
65     assert_raise(SeqError) { @entry.type_guess! }
66   end
67
68   def test_Seq_type_guess_EM_with_protein_returns_protein
69     @entry.seq = 'atcatcrFgatcg'
70     @entry.type_guess!
71     assert_equal('protein', @entry.type)
72   end
73
74   def test_Seq_type_guess_EM_with_rna_returns_rna
75     @entry.seq = 'atcatcrUgatcg'
76     @entry.type_guess!
77     assert_equal('rna', @entry.type)
78   end
79
80   def test_Seq_type_guess_EM_with_dna_returns_dna
81     @entry.seq = 'atcatcgatcg'
82     @entry.type_guess!
83     assert_equal('dna', @entry.type)
84   end
85
86   def test_Seq_length_is_correct
87     @entry.seq = 'ATCG'
88     assert_equal(4, @entry.length)
89   end
90
91   def test_Seq_indels_is_correct
92     @entry.seq = 'ATCG.-~_'
93     assert_equal(4, @entry.indels)
94   end
95
96   def test_Seq_to_rna_raises_if_no_sequence
97     @entry.type = 'dna'
98     assert_raise(SeqError) { @entry.to_rna }
99   end
100
101   def test_Seq_to_rna_raises_on_bad_type
102     @entry.seq  = 'ATCG'
103     @entry.type = 'rna'
104     assert_raise(SeqError) { @entry.to_rna }
105   end
106
107   def test_Seq_to_rna_transcribes_correctly
108     @entry.seq  = 'ATCGatcg'
109     @entry.type = 'dna'
110     assert_equal("AUCGaucg", @entry.to_rna)
111   end
112
113   def test_Seq_to_rna_changes_entry_type_to_rna
114     @entry.seq  = 'ATCGatcg'
115     @entry.type = 'dna'
116     @entry.to_rna
117     assert_equal("rna", @entry.type)
118   end
119
120   def test_Seq_to_dna_raises_if_no_sequence
121     @entry.type = 'rna'
122     assert_raise(SeqError) { @entry.to_dna }
123   end
124
125   def test_Seq_to_dna_raises_on_bad_type
126     @entry.seq  = 'AUCG'
127     @entry.type = 'dna'
128     assert_raise(SeqError) { @entry.to_dna }
129   end
130
131   def test_Seq_to_dna_transcribes_correctly
132     @entry.seq  = 'AUCGaucg'
133     @entry.type = 'rna'
134     assert_equal("ATCGatcg", @entry.to_dna)
135   end
136
137   def test_Seq_to_dna_changes_entry_type_to_dna
138     @entry.seq  = 'AUCGaucg'
139     @entry.type = 'rna'
140     @entry.to_dna
141     assert_equal("dna", @entry.type)
142   end
143
144   def test_Seq_to_bp_returns_correct_record
145     @entry.seq_name = 'test'
146     @entry.seq      = 'ATCG'
147     assert_equal({:SEQ_NAME=>"test", :SEQ=>"ATCG", :SEQ_LEN=>4}, @entry.to_bp)
148   end
149
150   def test_Seq_to_bp_raises_on_missing_seq_name
151     @entry.seq = 'ATCG'
152     assert_raise(SeqError) { @entry.to_bp }
153   end
154
155   def test_Seq_to_bp_raises_on_missing_sequence
156     @entry.seq_name = 'test'
157     assert_raise(SeqError) { @entry.to_bp }
158   end
159
160   def test_Seq_to_fasta_returns_correct_entry
161     @entry.seq_name = 'test'
162     @entry.seq      = 'ATCG'
163     assert_equal(">test\nATCG\n", @entry.to_fasta)
164   end
165
166   def test_Seq_to_fasta_wraps_correctly
167     entry = Seq.new("test", "ATCG")
168     assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
169   end
170
171   def test_Seq_to_fastq_returns_correct_entry
172     @entry.seq_name = 'test'
173     @entry.seq      = 'ATCG'
174     @entry.qual     = 'hhhh'
175     assert_equal("@test\nATCG\n+\nhhhh\n", @entry.to_fastq)
176   end
177
178   def test_Seq_to_key_with_bad_residue_raises
179     entry = Seq.new("test", "AUCG")
180     assert_raise(SeqError) { entry.to_key }
181   end
182
183   def test_Seq_to_key_returns_correctly
184     entry = Seq.new("test", "ATCG")
185     assert_equal(54, entry.to_key)
186   end
187
188   def test_Seq_reverse_returns_correctly
189     @entry.seq = "ATCG"
190     assert_equal("GCTA", @entry.reverse)
191   end
192
193   def test_Seq_complement_raises_if_no_sequence
194     @entry.type = 'dna'
195     assert_raise(SeqError) { @entry.complement }
196   end
197
198   def test_Seq_complement_raises_on_bad_type
199     @entry.seq  = 'ATCG'
200     @entry.type = 'protein'
201     assert_raise(SeqError) { @entry.complement }
202   end
203
204   def test_Seq_complement_for_DNA_is_correct
205     @entry.seq  = 'ATCGatcg'
206     @entry.type = 'dna'
207     assert_equal("TAGCtagc", @entry.complement)
208   end
209
210   def test_Seq_complement_for_RNA_is_correct
211     @entry.seq  = 'AUCGaucg'
212     @entry.type = 'rna'
213     assert_equal("UAGCuagc", @entry.complement)
214   end
215
216   def test_Seq_reverse_complement_for_DNA_is_correct
217     @entry.seq  = 'ATCGatcg'
218     @entry.type = 'dna'
219     assert_equal("cgatCGAT", @entry.reverse_complement)
220   end
221
222   def test_Seq_reverse_complement_for_RNA_is_correct
223     @entry.seq  = 'AUCGaucg'
224     @entry.type = 'rna'
225     assert_equal("cgauCGAU", @entry.reverse_complement)
226   end
227
228   def test_Seq_hamming_distance_returns_correctly
229     seq1 = Seq.new("test1", "ATCG")
230     seq2 = Seq.new("test2", "atgg")
231     assert_equal(1, seq1.hamming_distance(seq2))
232   end
233
234   def test_Seq_generate_with_length_lt_1_raises
235     assert_raise(SeqError) { @entry.generate(-10, "dna") }
236     assert_raise(SeqError) { @entry.generate(0, "dna") }
237   end
238
239   def test_Seq_generate_with_bad_type_raises
240     assert_raise(SeqError) { @entry.generate(10, "foo") }
241   end
242
243   def test_Seq_generate_with_ok_type_dont_raise
244     %w[dna DNA rna RNA protein Protein].each do |type|
245       assert_nothing_raised { @entry.generate(10, type) }
246     end
247   end
248
249   def test_Seq_subseq_with_start_lt_0_raises
250     @entry.seq = "ATCG"
251     assert_raise(SeqError) { @entry.subseq(-1, 1) }
252   end
253
254   def test_Seq_subseq_with_length_lt_1_raises
255     @entry.seq = "ATCG"
256     assert_raise(SeqError) { @entry.subseq(0, 0) }
257   end
258
259   def test_Seq_subseq_with_start_plus_length_gt_seq_raises
260     @entry.seq = "ATCG"
261     assert_raise(SeqError) { @entry.subseq(0, 5) }
262   end
263
264   def test_Seq_subseq_returns_correct_sequence
265     @entry.seq  = "ATCG"
266     assert_equal("AT", @entry.subseq(0, 2).seq)
267     assert_equal("CG", @entry.subseq(2, 2).seq)
268   end
269
270   def test_Seq_subseq_without_len_returns_correct_sequence
271     @entry.seq  = "ATCG"
272     assert_equal("ATCG", @entry.subseq(0).seq)
273     assert_equal("CG",   @entry.subseq(2).seq)
274   end
275
276   def test_Seq_subseq_returns_correct_qual
277     @entry.seq  = "ATCG"
278     @entry.qual = "abcd"
279     assert_equal("ab", @entry.subseq(0, 2).qual)
280     assert_equal("cd", @entry.subseq(2, 2).qual)
281   end
282
283   def test_Seq_subseq_without_len_returns_correct_qual
284     @entry.seq  = "ATCG"
285     @entry.qual = "abcd"
286     assert_equal("abcd", @entry.subseq(0).qual)
287     assert_equal("cd",   @entry.subseq(2).qual)
288   end
289
290   def test_Seq_subseq_bang_with_start_lt_0_raises
291     @entry.seq = "ATCG"
292     assert_raise(SeqError) { @entry.subseq!(-1, 1) }
293   end
294
295   def test_Seq_subseq_bang_with_length_lt_1_raises
296     @entry.seq = "ATCG"
297     assert_raise(SeqError) { @entry.subseq!(0, 0) }
298   end
299
300   def test_Seq_subseq_bang_with_start_plus_length_gt_seq_raises
301     @entry.seq = "ATCG"
302     assert_raise(SeqError) { @entry.subseq!(0, 5) }
303   end
304
305   def test_Seq_subseq_bang_returns_correct_sequence
306     @entry.seq  = "ATCG"
307     @entry.subseq!(0, 2)
308     assert_equal("AT", @entry.seq)
309     @entry.seq  = "ATCG"
310     @entry.subseq!(2, 2)
311     assert_equal("CG", @entry.seq)
312   end
313
314   def test_Seq_subseq_bang_without_len_returns_correct_sequence
315     @entry.seq  = "ATCG"
316     @entry.subseq!(0)
317     assert_equal("ATCG", @entry.seq)
318     @entry.seq  = "ATCG"
319     @entry.subseq!(2)
320     assert_equal("CG", @entry.seq)
321   end
322
323   def test_Seq_subseq_bang_with_pos_and_len_returns_correct_qual
324     @entry.seq  = "ATCG"
325     @entry.qual = "abcd"
326     @entry.subseq!(0, 2)
327     assert_equal("ab", @entry.qual)
328     @entry.seq  = "ATCG"
329     @entry.qual = "abcd"
330     @entry.subseq!(2, 2)
331     assert_equal("cd", @entry.qual)
332   end
333
334   def test_Seq_subseq_bang_with_pos_returns_correct_qual
335     @entry.seq  = "ATCG"
336     @entry.qual = "abcd"
337     @entry.subseq!(0)
338     assert_equal("abcd", @entry.qual)
339     @entry.seq  = "ATCG"
340     @entry.qual = "abcd"
341     @entry.subseq!(2)
342     assert_equal("cd", @entry.qual)
343   end
344
345   def test_Seq_subseq_rand_returns_correct_sequence
346     @entry.seq  = "ATCG"
347     assert_equal("ATCG", @entry.subseq_rand(4).seq)
348   end
349
350   def test_Seq_composition_returns_correctly
351     @entry.seq = "AAAATTTCCG"
352     assert_equal(4, @entry.composition["A"])
353     assert_equal(3, @entry.composition["T"])
354     assert_equal(2, @entry.composition["C"])
355     assert_equal(1, @entry.composition["G"])
356     assert_equal(0, @entry.composition["X"])
357   end
358
359   def test_Seq_homopol_max_returns_0_with_empty_sequence
360     @entry.seq = ""
361     assert_equal(0, @entry.homopol_max)
362   end
363
364   def test_Seq_homopol_max_returns_0_with_nil_sequence
365     @entry.seq = nil
366     assert_equal(0, @entry.homopol_max)
367   end
368
369   def test_Seq_homopol_max_returns_0_when_not_found
370     @entry.seq = "AtTcCcGggGnnNnn"
371     assert_equal(0, @entry.homopol_max(6))
372   end
373
374   def test_Seq_homopol_max_returns_correctly
375     @entry.seq = "AtTcCcGggGnnNnn"
376     assert_equal(5, @entry.homopol_max(3))
377   end
378
379   def test_Seq_hard_mask_returns_correctly
380     @entry.seq = "--AAAANn"
381     assert_equal(33.33, @entry.hard_mask)
382   end
383
384   def test_Seq_soft_mask_returns_correctly
385     @entry.seq = "--AAAa"
386     assert_equal(25.00, @entry.soft_mask)
387   end
388 end
389
390
391 __END__