2 $:.unshift File.join(File.dirname(__FILE__), '..', '..')
4 # Copyright (C) 2011 Martin A. Hansen.
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24 # This software is part of the Biopieces framework (www.biopieces.org).
26 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
34 %{@HD\tVN:1.3\tSO:coordinate
37 r001\t163\tref\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*
38 r002\t0\tref\t9\t30\t3S6M1P1I4M\t*\t0\t0\tAAAAGATAAGGATA\t*
39 r003\t0\tref\t9\t30\t5H6M\t*\t0\t0\tAGCTAA\t*\tNM:i:1
40 r004\t0\tref\t16\t30\t6M14N5M\t*\t0\t0\tATAGCTTCAGC\t*
41 r003\t16\tref\t29\t30\t6H5M\t*\t0\t0\tTAGGC\t*\tNM:i:0
42 r001\t83\tref\t37\t30\t9M\t=\t7\t-39\tCAGCGCCAT\t*
45 class SamTest < Test::Unit::TestCase
47 @sam = Sam.new(StringIO.new(SAM_DATA))
50 test "#new with missing version number raises" do
51 assert_raise(SamError) { Sam.new(StringIO.new("@HD")) }
54 test "#new with bad version number raises" do
55 assert_raise(SamError) { Sam.new(StringIO.new("@HD\tXN:1.3")) }
58 test "#new with ok version number returns correctly" do
59 sam = Sam.new(StringIO.new("@HD\tVN:1.3"))
60 assert_equal(1.3, sam.header[:HD][:VN])
63 test "#new with bad sort order raises" do
64 assert_raise(SamError) { Sam.new(StringIO.new("@HD\tVN:1.3\tSO:fish")) }
67 test "#new with ok sort order returns correctly" do
68 %w{unknown unsorted queryname coordinate}.each do |order|
69 sam = Sam.new(StringIO.new("@HD\tVN:1.3\tSO:#{order}"))
70 assert_equal(order, sam.header[:HD][:SO])
74 test "#new with missing sequence name raises" do
75 assert_raise(SamError) { Sam.new(StringIO.new("@SQ")) }
78 test "#new with bad sequence name raises" do
79 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:")) }
82 test "#new with ok sequence name returns correctly" do
83 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45"))
84 assert_equal({:LN=>45}, sam.header[:SQ][:SN][:ref])
87 test "#new with duplicate sequence name raises" do
88 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:ref\n@SQ\tSN:ref")) }
91 test "#new with missing sequence length raises" do
92 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:ref")) }
95 test "#new with bad sequence length raises" do
96 assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:x")) }
99 test "#new with ok sequence length returns correctly" do
100 sam = Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:995"))
101 assert_equal(995, sam.header[:SQ][:SN][:scaffold17_1_MH0083][:LN])
104 test "#new with full SQ dont raise" do
105 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\tAS:ident\tM5:87e6b2aedf51b1f9c89becfab9267f41\tSP:E.coli\tUR:http://www.biopieces.org"))
106 assert_nothing_raised { sam.header }
109 test "#new with bad read group identifier raises" do
110 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:")) }
113 test "#new with missing read group identifier raises" do
114 assert_raise(SamError) { Sam.new(StringIO.new("@RG")) }
117 test "#new with duplicate read group identifier raises" do
118 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:123")) }
121 test "#new with ok read group identifier dont raise" do
122 sam = Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:124"))
123 assert_nothing_raised { sam.header }
126 test "#new with bad flow order raises" do
127 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\tFO:3")) }
130 test "#new with ok flow order dont raise" do
131 sam = Sam.new(StringIO.new("@RG\tID:123\tFO:*"))
132 assert_nothing_raised { sam.header }
133 sam = Sam.new(StringIO.new("@RG\tID:123\tFO:ACMGRSVTWYHKDBN"))
134 assert_nothing_raised { sam.header }
137 test "#new with bad platform raises" do
138 assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\tPL:maersk")) }
141 test "#new with ok platform dont raise" do
142 sam = Sam.new(StringIO.new("@RG\tID:123\tPL:ILLUMINA"))
143 assert_nothing_raised { sam.header }
146 test "#new with bad program identifier raises" do
147 assert_raise(SamError) { Sam.new(StringIO.new("@PG\tID:")) }
150 test "#new with missing program identifier raises" do
151 assert_raise(SamError) { Sam.new(StringIO.new("@PG")) }
154 test "#new with duplicate program identifier raises" do
155 assert_raise(SamError) { Sam.new(StringIO.new("@PG\tID:123\n@PG\tID:123")) }
158 test "#new with bad comment raises" do
159 assert_raise(SamError) { Sam.new(StringIO.new("@CO\t")) }
162 test "#new with ok comment dont raise" do
163 sam = Sam.new(StringIO.new("@CO\tfubar"))
164 assert_nothing_raised { sam.header }
167 test "#each with bad field count raises" do
170 (0 ... 11).each do |i|
171 sam = Sam.new(StringIO.new(fields.join("\t") + $/))
172 assert_raise(SamError) { sam.each }
177 test "#each with ok field count dont raise" do
178 sam = Sam.new(StringIO.new(SAM_DATA))
179 assert_nothing_raised { sam.each }
182 test "#each with bad qname raises" do
183 sam = Sam.new(StringIO.new(" \t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
184 assert_raise(SamError) { sam.each }
187 test "#each with ok qname dont raise" do
188 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
189 assert_nothing_raised(SamError) { sam.each }
192 test "#each with bad flag raises" do
193 sam = Sam.new(StringIO.new("*\t-1\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
194 assert_raise(SamError) { sam.each }
196 sam = Sam.new(StringIO.new("*\t65536\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
197 assert_raise(SamError) { sam.each }
200 test "#each with ok flag dont raise" do
201 sam = Sam.new(StringIO.new("*\t0\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
202 assert_nothing_raised { sam.each }
204 sam = Sam.new(StringIO.new("*\t65535\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
205 assert_nothing_raised { sam.each }
208 test "#each with bad rname raises" do
209 sam = Sam.new(StringIO.new("*\t*\t \t*\t*\t*\t*\t*\t*\t*\t*\n"))
210 assert_raise(SamError) { sam.each }
213 test "#each with ok rname dont raise" do
214 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
215 assert_nothing_raised { sam.each }
218 test "#each with bad pos raises" do
219 sam = Sam.new(StringIO.new("*\t*\t*\t-1\t*\t*\t*\t*\t*\t*\t*\n"))
220 assert_raise(SamError) { sam.each }
222 sam = Sam.new(StringIO.new("*\t*\t*\t536870912\t*\t*\t*\t*\t*\t*\t*\n"))
223 assert_raise(SamError) { sam.each }
226 test "#each with ok pos dont raise" do
227 sam = Sam.new(StringIO.new("*\t*\t*\t0\t*\t*\t*\t*\t*\t*\t*\n"))
228 assert_nothing_raised { sam.each }
230 sam = Sam.new(StringIO.new("*\t*\t*\t536870911\t*\t*\t*\t*\t*\t*\t*\n"))
231 assert_nothing_raised { sam.each }
234 test "#each with bad mapq raises" do
235 sam = Sam.new(StringIO.new("*\t*\t*\t*\t-1\t*\t*\t*\t*\t*\t*\n"))
236 assert_raise(SamError) { sam.each }
238 sam = Sam.new(StringIO.new("*\t*\t*\t*\t256\t*\t*\t*\t*\t*\t*\n"))
239 assert_raise(SamError) { sam.each }
242 test "#each with ok mapq dont raise" do
243 sam = Sam.new(StringIO.new("*\t*\t*\t*\t0\t*\t*\t*\t*\t*\t*\n"))
244 assert_nothing_raised { sam.each }
246 sam = Sam.new(StringIO.new("*\t*\t*\t*\t255\t*\t*\t*\t*\t*\t*\n"))
247 assert_nothing_raised { sam.each }
250 test "#each with bad rnext raises" do
251 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t \t*\t*\t*\t*\n"))
252 assert_raise(SamError) { sam.each }
255 test "#each with ok rnext dont raise" do
256 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
257 assert_nothing_raised { sam.each }
259 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t=\t*\t*\t*\t*\n"))
260 assert_nothing_raised { sam.each }
262 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t!\t*\t*\t*\t*\n"))
263 assert_nothing_raised { sam.each }
266 test "#each with bad pnext raises" do
267 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t-1\t*\t*\t*\n"))
268 assert_raise(SamError) { sam.each }
270 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t536870912\t*\t*\t*\n"))
271 assert_raise(SamError) { sam.each }
274 test "#each with ok pnext dont raise" do
275 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t0\t*\t*\t*\n"))
276 assert_nothing_raised { sam.each }
278 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t536870911\t*\t*\t*\t*\n"))
279 assert_nothing_raised { sam.each }
282 test "#each with bad tlen raises" do
283 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t-536870912\t*\t*\n"))
284 assert_raise(SamError) { sam.each }
286 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t536870912\t*\t*\n"))
287 assert_raise(SamError) { sam.each }
290 test "#each with ok tlen dont raise" do
291 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t-536870911\t*\t*\n"))
292 assert_nothing_raised { sam.each }
294 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t536870911\t*\t*\n"))
295 assert_nothing_raised { sam.each }
298 test "#each with bad seq raises" do
299 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t \t*\n"))
300 assert_raise(SamError) { sam.each }
303 test "#each with ok seq dont raise" do
304 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
305 assert_nothing_raised { sam.each }
307 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\tATCGatcg=.\t*\n"))
308 assert_nothing_raised { sam.each }
311 test "#each with bad qual raises" do
312 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t \n"))
313 assert_raise(SamError) { sam.each }
316 test "#each with ok qual dont raise" do
317 sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t@\n"))
318 assert_nothing_raised { sam.each }
321 test "#each with rname missing from header raises" do
322 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\tMIS\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
323 assert_raise(SamError) { sam.each }
326 test "#each with rname present in header dont raise" do
327 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\tref\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
328 assert_nothing_raised { sam.each }
330 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
331 assert_nothing_raised { sam.each }
334 test "#each with rnext missing from header raises" do
335 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\tMIS\t*\t\*\t*\t*\n"))
336 assert_raise(SamError) { sam.each }
339 test "#each with rnext present in header dont raise" do
340 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
341 assert_nothing_raised { sam.each }
343 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t=\t*\t\*\t*\t*\n"))
344 assert_nothing_raised { sam.each }
346 sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\tref\t*\t\*\t*\t*\n"))
347 assert_nothing_raised { sam.each }
350 test "#to_bp returns correctly" do
351 string = "ID00036734\t0\tgi48994873\t366089\t37\t37M1I62M\t*\t0\t0\tGTTCCGCTATCGGCTGAATTTGATTGCGAGTGAGATATTTTATGCCAGCCAGCCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGCTAACAGCG\t*\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:99\n"
353 sam = Sam.new(StringIO.new(string))
356 assert_equal("SAM", Sam.to_bp(s)[:REC_TYPE])
357 assert_equal("ID00036734", Sam.to_bp(s)[:Q_ID])
358 assert_equal("-", Sam.to_bp(s)[:STRAND])
359 assert_equal("gi48994873", Sam.to_bp(s)[:S_ID])
360 assert_equal(366089, Sam.to_bp(s)[:S_BEG])
361 assert_equal(37, Sam.to_bp(s)[:MAPQ])
362 assert_equal("37M1I62M", Sam.to_bp(s)[:CIGAR])
363 assert_equal("GTTCCGCTATCGGCTGAATTTGATTGCGAGTGAGATATTTTATGCCAGCCAGCCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGCTAACAGCG", Sam.to_bp(s)[:SEQ])
364 assert_equal("37:->T", Sam.to_bp(s)[:ALIGN])
368 test "#to_bp alignment descriptor without mismatch or indel returns correctly" do
369 string = "q_id\t0\ts_id\t1000\t40\t10M\t*\t0\t0\tGTTCCGCTAT\t*\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:10\n"
371 sam = Sam.new(StringIO.new(string))
374 assert_equal(nil, Sam.to_bp(s)[:ALIGN])
378 test "#to_bp alignment descriptor with mismatches returns correctly" do
379 string = "q_id\t0\ts_id\t1000\t40\t10M\t*\t0\t0\tgTTCCGCTAt\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:0C8A\n"
381 sam = Sam.new(StringIO.new(string))
384 assert_equal("0:C>g,9:A>t", Sam.to_bp(s)[:ALIGN])
388 test "#to_bp alignment descriptor with insertions returns correctly" do
389 string = "q_id\t0\ts_id\t1000\t40\t1I10M1I\t*\t0\t0\taGTTCCGCTATc\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:12\n"
391 sam = Sam.new(StringIO.new(string))
394 assert_equal("0:->a,11:->c", Sam.to_bp(s)[:ALIGN])
398 test "#to_bp alignment descriptor with deletions returns correctly" do
399 string = "q_id\t0\ts_id\t1000\t40\t2D10M\t*\t0\t0\tGTTCCGCTAT\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:^AC10\n"
401 sam = Sam.new(StringIO.new(string))
404 assert_equal("0:A>-,1:C>-", Sam.to_bp(s)[:ALIGN])