3 # Copyright (C) 2011 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This software is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
33 %{@HD\tVN:1.3\tSO:coordinate
36 r001\t163\tref\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*
37 r002\t0\tref\t9\t30\t3S6M1P1I4M\t*\t0\t0\tAAAAGATAAGGATA\t*
38 r003\t0\tref\t9\t30\t5H6M\t*\t0\t0\tAGCTAA\t*\tNM:i:1
39 r004\t0\tref\t16\t30\t6M14N5M\t*\t0\t0\tATAGCTTCAGC\t*
40 r003\t16\tref\t29\t30\t6H5M\t*\t0\t0\tTAGGC\t*\tNM:i:0
41 r001\t83\tref\t37\t30\t9M\t=\t7\t-39\tCAGCGCCAT\t*
44 class SamTest < Test::Unit::TestCase
46 @sam = Sam.new(StringIO.new(SAM_DATA))
49 def test_Sam_new_with_missing_version_number_raises
50 assert_raise(SamError) { Sam.new(StringIO.new("@HD")) }
53 def test_Sam_new_with_bad_version_number_raises
54 assert_raise(SamError) { Sam.new(StringIO.new("@HD\tXN:1.3")) }
57 def test_Sam_new_with_ok_version_number_returns_correctly
58 sam = Sam.new(StringIO.new("@HD\tVN:1.3"))
59 assert_equal(1.3, sam.header[:HD][:VN])
62 def test_Sam_new_with_bad_sort_order_raises
63 assert_raise(SamError) { Sam.new(StringIO.new("@HD\tVN:1.3\tSO:fish")) }
66 def test_Sam_new_with_ok_sort_order_returns_correctly
67 %w{unknown unsorted queryname coordinate}.each do |order|
68 sam = Sam.new(StringIO.new("@HD\tVN:1.3\tSO:#{order}"))
69 assert_equal(order, sam.header[:HD][:SO])
73 def test_Sam_new_with_missing_sequence_name_raises
74 assert_raise(SamError) { Sam.new(StringIO.new("@SQ")) }
77 def test_Sam_new_with_bad_sequence_name_raises
78 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:")) }
81 def test_Sam_new_with_ok_sequence_name_returns_correctly
82 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45"))
83 assert_equal({:LN=>45}, sam.header[:SQ][:SN][:ref])
86 def test_Sam_new_with_duplicate_sequence_name_raises
87 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:ref\n@SQ\tSN:ref")) }
90 def test_Sam_new_with_missing_sequence_length_raises
91 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:ref")) }
94 def test_Sam_new_with_bad_sequence_length_raises
95 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:x")) }
98 def test_Sam_new_with_ok_sequence_length_returns_correctly
99 sam = Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:995"))
100 assert_equal(995, sam.header[:SQ][:SN][:scaffold17_1_MH0083][:LN])
103 def test_Sam_new_with_full_SQ_dont_raise
104 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\tAS:ident\tM5:87e6b2aedf51b1f9c89becfab9267f41\tSP:E.coli\tUR:http://www.biopieces.org"))
105 assert_nothing_raised { sam.header }
108 def test_Sam_new_with_bad_read_group_identifier_raises
109 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:")) }
112 def test_Sam_new_with_missing_read_group_identifier_raises
113 assert_raise(SamError) { Sam.new(StringIO.new("@RG")) }
116 def test_Sam_new_with_duplicate_read_group_identifier_raises
117 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:123")) }
120 def test_Sam_new_with_ok_read_group_identifier_dont_raise
121 sam = Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:124"))
122 assert_nothing_raised { sam.header }
125 def test_Sam_new_with_bad_flow_order_raises
126 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\tFO:3")) }
129 def test_Sam_new_with_ok_flow_order_dont_raise
130 sam = Sam.new(StringIO.new("@RG\tID:123\tFO:*"))
131 assert_nothing_raised { sam.header }
132 sam = Sam.new(StringIO.new("@RG\tID:123\tFO:ACMGRSVTWYHKDBN"))
133 assert_nothing_raised { sam.header }
136 def test_Sam_new_with_bad_platform_raises
137 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\tPL:maersk")) }
140 def test_Sam_new_with_ok_platform_dont_raise
141 sam = Sam.new(StringIO.new("@RG\tID:123\tPL:ILLUMINA"))
142 assert_nothing_raised { sam.header }
145 def test_Sam_new_with_bad_program_identifier_raises
146 assert_raise(SamError) { Sam.new(StringIO.new("@PG\tID:")) }
149 def test_Sam_new_with_missing_program_identifier_raises
150 assert_raise(SamError) { Sam.new(StringIO.new("@PG")) }
153 def test_Sam_new_with_duplicate_program_identifier_raises
154 assert_raise(SamError) { Sam.new(StringIO.new("@PG\tID:123\n@PG\tID:123")) }
157 def test_Sam_new_with_bad_comment_raises
158 assert_raise(SamError) { Sam.new(StringIO.new("@CO\t")) }
161 def test_Sam_new_with_ok_comment_dont_raise
162 sam = Sam.new(StringIO.new("@CO\tfubar"))
163 assert_nothing_raised { sam.header }
166 def test_Sam_each_with_bad_field_count_raises
169 (0 ... 11).each do |i|
170 sam = Sam.new(StringIO.new(fields.join("\t") + $/))
171 assert_raise(SamError) { sam.each }
176 def test_Sam_each_with_ok_field_count_dont_raise
177 sam = Sam.new(StringIO.new(SAM_DATA))
178 assert_nothing_raised { sam.each }
181 def test_Sam_each_with_bad_qname_raises
182 sam = Sam.new(StringIO.new(" \t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
183 assert_raise(SamError) { sam.each }
186 def test_Sam_each_with_ok_qname_dont_raise
187 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
188 assert_nothing_raised(SamError) { sam.each }
191 def test_Sam_each_with_bad_flag_raises
192 sam = Sam.new(StringIO.new("*\t-1\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
193 assert_raise(SamError) { sam.each }
195 sam = Sam.new(StringIO.new("*\t65536\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
196 assert_raise(SamError) { sam.each }
199 def test_Sam_each_with_ok_flag_dont_raise
200 sam = Sam.new(StringIO.new("*\t0\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
201 assert_nothing_raised { sam.each }
203 sam = Sam.new(StringIO.new("*\t65535\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
204 assert_nothing_raised { sam.each }
207 def test_Sam_each_with_bad_rname_raises
208 sam = Sam.new(StringIO.new("*\t*\t \t*\t*\t*\t*\t*\t*\t*\t*\n"))
209 assert_raise(SamError) { sam.each }
212 def test_Sam_each_with_ok_rname_dont_raise
213 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
214 assert_nothing_raised { sam.each }
217 def test_Sam_each_with_bad_pos_raises
218 sam = Sam.new(StringIO.new("*\t*\t*\t-1\t*\t*\t*\t*\t*\t*\t*\n"))
219 assert_raise(SamError) { sam.each }
221 sam = Sam.new(StringIO.new("*\t*\t*\t536870912\t*\t*\t*\t*\t*\t*\t*\n"))
222 assert_raise(SamError) { sam.each }
225 def test_Sam_each_with_ok_pos_dont_raise
226 sam = Sam.new(StringIO.new("*\t*\t*\t0\t*\t*\t*\t*\t*\t*\t*\n"))
227 assert_nothing_raised { sam.each }
229 sam = Sam.new(StringIO.new("*\t*\t*\t536870911\t*\t*\t*\t*\t*\t*\t*\n"))
230 assert_nothing_raised { sam.each }
233 def test_Sam_each_with_bad_mapq_raises
234 sam = Sam.new(StringIO.new("*\t*\t*\t*\t-1\t*\t*\t*\t*\t*\t*\n"))
235 assert_raise(SamError) { sam.each }
237 sam = Sam.new(StringIO.new("*\t*\t*\t*\t256\t*\t*\t*\t*\t*\t*\n"))
238 assert_raise(SamError) { sam.each }
241 def test_Sam_each_with_ok_mapq_dont_raise
242 sam = Sam.new(StringIO.new("*\t*\t*\t*\t0\t*\t*\t*\t*\t*\t*\n"))
243 assert_nothing_raised { sam.each }
245 sam = Sam.new(StringIO.new("*\t*\t*\t*\t255\t*\t*\t*\t*\t*\t*\n"))
246 assert_nothing_raised { sam.each }
249 def test_Sam_each_with_bad_rnext_raises
250 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t \t*\t*\t*\t*\n"))
251 assert_raise(SamError) { sam.each }
254 def test_Sam_each_with_ok_rnext_dont_raise
255 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
256 assert_nothing_raised { sam.each }
258 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t=\t*\t*\t*\t*\n"))
259 assert_nothing_raised { sam.each }
261 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t!\t*\t*\t*\t*\n"))
262 assert_nothing_raised { sam.each }
265 def test_Sam_each_with_bad_pnext_raises
266 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t-1\t*\t*\t*\n"))
267 assert_raise(SamError) { sam.each }
269 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t536870912\t*\t*\t*\n"))
270 assert_raise(SamError) { sam.each }
273 def test_Sam_each_with_ok_pnext_dont_raise
274 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t0\t*\t*\t*\n"))
275 assert_nothing_raised { sam.each }
277 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t536870911\t*\t*\t*\t*\n"))
278 assert_nothing_raised { sam.each }
281 def test_Sam_each_with_bad_tlen_raises
282 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t-536870912\t*\t*\n"))
283 assert_raise(SamError) { sam.each }
285 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t536870912\t*\t*\n"))
286 assert_raise(SamError) { sam.each }
289 def test_Sam_each_with_ok_tlen_dont_raise
290 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t-536870911\t*\t*\n"))
291 assert_nothing_raised { sam.each }
293 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t536870911\t*\t*\n"))
294 assert_nothing_raised { sam.each }
297 def test_Sam_each_with_bad_seq_raises
298 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t \t*\n"))
299 assert_raise(SamError) { sam.each }
302 def test_Sam_each_with_ok_seq_dont_raise
303 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
304 assert_nothing_raised { sam.each }
306 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\tATCGatcg=.\t*\n"))
307 assert_nothing_raised { sam.each }
310 def test_Sam_each_with_bad_qual_raises
311 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t \n"))
312 assert_raise(SamError) { sam.each }
315 def test_Sam_each_with_ok_qual_dont_raise
316 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t@\n"))
317 assert_nothing_raised { sam.each }
320 def test_Sam_each_with_rname_missing_from_header_raises
321 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\tMIS\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
322 assert_raise(SamError) { sam.each }
325 def test_Sam_each_with_rname_present_in_header_dont_raise
326 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\tref\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
327 assert_nothing_raised { sam.each }
329 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
330 assert_nothing_raised { sam.each }
333 def test_Sam_each_with_rnext_missing_from_header_raises
334 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\tMIS\t*\t\*\t*\t*\n"))
335 assert_raise(SamError) { sam.each }
338 def test_Sam_each_with_rnext_present_in_header_dont_raise
339 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
340 assert_nothing_raised { sam.each }
342 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t=\t*\t\*\t*\t*\n"))
343 assert_nothing_raised { sam.each }
345 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\tref\t*\t\*\t*\t*\n"))
346 assert_nothing_raised { sam.each }
349 def test_Sam_to_bp_returns_correctly
350 string = "ID00036734\t0\tgi48994873\t366089\t37\t37M1I62M\t*\t0\t0\tGTTCCGCTATCGGCTGAATTTGATTGCGAGTGAGATATTTTATGCCAGCCAGCCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGCTAACAGCG\t*\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:99\n"
352 sam = Sam.new(StringIO.new(string))
355 assert_equal("SAM", Sam.to_bp(s)[:REC_TYPE])
356 assert_equal("ID00036734", Sam.to_bp(s)[:Q_ID])
357 assert_equal("-", Sam.to_bp(s)[:STRAND])
358 assert_equal("gi48994873", Sam.to_bp(s)[:S_ID])
359 assert_equal(366089, Sam.to_bp(s)[:S_BEG])
360 assert_equal(37, Sam.to_bp(s)[:MAPQ])
361 assert_equal("37M1I62M", Sam.to_bp(s)[:CIGAR])
362 assert_equal("GTTCCGCTATCGGCTGAATTTGATTGCGAGTGAGATATTTTATGCCAGCCAGCCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGCTAACAGCG", Sam.to_bp(s)[:SEQ])
363 assert_equal("37:->T", Sam.to_bp(s)[:ALIGN])
367 def test_Sam_to_bp_alignment_descriptor_without_mismatch_or_indel_returns_correctly
368 string = "q_id\t0\ts_id\t1000\t40\t10M\t*\t0\t0\tGTTCCGCTAT\t*\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:10\n"
370 sam = Sam.new(StringIO.new(string))
373 assert_equal(nil, Sam.to_bp(s)[:ALIGN])
377 def test_Sam_to_bp_alignment_descriptor_with_mismatches_returns_correctly
378 string = "q_id\t0\ts_id\t1000\t40\t10M\t*\t0\t0\tgTTCCGCTAt\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:0C8A\n"
380 sam = Sam.new(StringIO.new(string))
383 assert_equal("0:C>g,9:A>t", Sam.to_bp(s)[:ALIGN])
387 def test_Sam_to_bp_alignment_descriptor_with_insertions_returns_correctly
388 string = "q_id\t0\ts_id\t1000\t40\t1I10M1I\t*\t0\t0\taGTTCCGCTATc\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:12\n"
390 sam = Sam.new(StringIO.new(string))
393 assert_equal("0:->a,11:->c", Sam.to_bp(s)[:ALIGN])
397 def test_Sam_to_bp_alignment_descriptor_with_deletions_returns_correctly
398 string = "q_id\t0\ts_id\t1000\t40\t2D10M\t*\t0\t0\tGTTCCGCTAT\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:^AC10\n"
400 sam = Sam.new(StringIO.new(string))
403 assert_equal("0:A>-,1:C>-", Sam.to_bp(s)[:ALIGN])