#!/usr/bin/env ruby
+$:.unshift File.join(File.dirname(__FILE__), '..', '..')
# Copyright (C) 2011 Martin A. Hansen.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'test/unit'
+require 'test/helper'
require 'maasha/sam'
-require 'pp'
require 'stringio'
-SAM_TEST =
+SAM_DATA =
%{@HD\tVN:1.3\tSO:coordinate
@SQ\tSN:ref\tLN:45
@CO\tMyComment
class SamTest < Test::Unit::TestCase
def setup
- @sam = Sam.new(StringIO.new(SAM_TEST))
+ @sam = Sam.new(StringIO.new(SAM_DATA))
end
-# def test_Sam_header_without_entry_returns_nil
-# @sam.io = StringIO.new
-# assert_nil(@sam.header)
-# end
-
- def test_Sam_header_parse_with_missing_version_number_raises
- sam = Sam.new(StringIO.new("@HD"))
- assert_raise(SamError) { sam.header }
+ test "#new with missing version number raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@HD")) }
end
- def test_Sam_header_parse_with_bad_version_number_raises
- sam = Sam.new(StringIO.new("@HD\tXN:1.3"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad version number raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@HD\tXN:1.3")) }
end
- def test_Sam_header_parse_with_ok_version_number_returns_correctly
+ test "#new with ok version number returns correctly" do
sam = Sam.new(StringIO.new("@HD\tVN:1.3"))
assert_equal(1.3, sam.header[:HD][:VN])
end
- def test_Sam_header_parse_with_bad_sort_order_raises
- sam = Sam.new(StringIO.new("@HD\tVN:1.3\tSO:fish"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad sort order raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@HD\tVN:1.3\tSO:fish")) }
end
- def test_Sam_header_parse_with_ok_sort_order_returns_correctly
+ test "#new with ok sort order returns correctly" do
%w{unknown unsorted queryname coordinate}.each do |order|
sam = Sam.new(StringIO.new("@HD\tVN:1.3\tSO:#{order}"))
assert_equal(order, sam.header[:HD][:SO])
end
end
- def test_Sam_header_parse_with_missing_sequence_name_raises
- sam = Sam.new(StringIO.new("@SQ"))
- assert_raise(SamError) { sam.header }
+ test "#new with missing sequence name raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@SQ")) }
end
- def test_Sam_header_parse_with_bad_sequence_name_raises
- sam = Sam.new(StringIO.new("@SQ\tSN:"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad sequence name raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:")) }
end
- def test_Sam_header_parse_with_ok_sequence_name_returns_correctly
+ test "#new with ok sequence name returns correctly" do
sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45"))
assert_equal({:LN=>45}, sam.header[:SQ][:SN][:ref])
end
- def test_Sam_header_parse_with_duplicate_sequence_name_raises
- sam = Sam.new(StringIO.new("@SQ\tSN:ref\n@SQ\tSN:ref"))
- assert_raise(SamError) { sam.header[:SQ][:SN][:ref] }
+ test "#new with duplicate sequence name raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:ref\n@SQ\tSN:ref")) }
end
- def test_Sam_header_parse_with_missing_sequence_length_raises
- sam = Sam.new(StringIO.new("@SQ\tSN:ref"))
- assert_raise(SamError) { sam.header }
+ test "#new with missing sequence length raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:ref")) }
end
- def test_Sam_header_parse_with_bad_sequence_length_raises
- sam = Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:x"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad sequence length raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:x")) }
end
- def test_Sam_header_parse_with_ok_sequence_length_returns_correctly
+ test "#new with ok sequence length returns correctly" do
sam = Sam.new(StringIO.new("@SQ\tSN:scaffold17_1_MH0083\tLN:995"))
assert_equal(995, sam.header[:SQ][:SN][:scaffold17_1_MH0083][:LN])
end
- def test_Sam_header_parse_with_full_SQ_dont_raise
- sam = Sam.new("@SQ\tSN:ref\tLN:45\tAS:ident\tM5:87e6b2aedf51b1f9c89becfab9267f41\tSP:E.coli\tUR:http://www.biopieces.org")
+ test "#new with full SQ dont raise" do
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\tAS:ident\tM5:87e6b2aedf51b1f9c89becfab9267f41\tSP:E.coli\tUR:http://www.biopieces.org"))
assert_nothing_raised { sam.header }
end
- def test_Sam_header_parse_with_bad_read_group_identifier_raises
- sam = Sam.new(StringIO.new("@RG\tID:"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad read group identifier raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:")) }
end
- def test_Sam_header_parse_with_missing_read_group_identifier_raises
- sam = Sam.new(StringIO.new("@RG"))
- assert_raise(SamError) { sam.header }
+ test "#new with missing read group identifier raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@RG")) }
end
- def test_Sam_header_parse_with_duplicate_read_group_identifier_raises
- sam = Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:123"))
- assert_raise(SamError) { sam.header }
+ test "#new with duplicate read group identifier raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:123")) }
end
- def test_Sam_header_parse_with_ok_read_group_identifier_dont_raise
+ test "#new with ok read group identifier dont raise" do
sam = Sam.new(StringIO.new("@RG\tID:123\n@RG\tID:124"))
assert_nothing_raised { sam.header }
end
- def test_Sam_header_parse_with_bad_flow_order_raises
- sam = Sam.new(StringIO.new("@RG\tID:123\tFO:3"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad flow order raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\tFO:3")) }
end
- def test_Sam_header_parse_with_ok_flow_order_dont_raise
+ test "#new with ok flow order dont raise" do
sam = Sam.new(StringIO.new("@RG\tID:123\tFO:*"))
assert_nothing_raised { sam.header }
sam = Sam.new(StringIO.new("@RG\tID:123\tFO:ACMGRSVTWYHKDBN"))
assert_nothing_raised { sam.header }
end
- def test_Sam_header_parse_with_bad_platform_raises
- sam = Sam.new(StringIO.new("@RG\tID:123\tPL:maersk"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad platform raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@RG\tID:123\tPL:maersk")) }
end
- def test_Sam_header_parse_with_ok_platform_dont_raise
+ test "#new with ok platform dont raise" do
sam = Sam.new(StringIO.new("@RG\tID:123\tPL:ILLUMINA"))
assert_nothing_raised { sam.header }
end
- def test_Sam_header_parse_with_bad_program_identifier_raises
- sam = Sam.new(StringIO.new("@PG\tID:"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad program identifier raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@PG\tID:")) }
end
- def test_Sam_header_parse_with_missing_program_identifier_raises
- sam = Sam.new(StringIO.new("@PG"))
- assert_raise(SamError) { sam.header }
+ test "#new with missing program identifier raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@PG")) }
end
- def test_Sam_header_parse_with_duplicate_program_identifier_raises
- sam = Sam.new(StringIO.new("@PG\tID:123\n@PG\tID:123"))
- assert_raise(SamError) { sam.header }
+ test "#new with duplicate program identifier raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@PG\tID:123\n@PG\tID:123")) }
end
- def test_Sam_header_parse_with_bad_comment_raises
- sam = Sam.new(StringIO.new("@CO\t"))
- assert_raise(SamError) { sam.header }
+ test "#new with bad comment raises" do
+ assert_raise(SamError) { Sam.new(StringIO.new("@CO\t")) }
end
- def test_Sam_header_parse_with_ok_comment_dont_raise
+ test "#new with ok comment dont raise" do
sam = Sam.new(StringIO.new("@CO\tfubar"))
assert_nothing_raised { sam.header }
end
+
+ test "#each with bad field count raises" do
+ fields = []
+
+ (0 ... 11).each do |i|
+ sam = Sam.new(StringIO.new(fields.join("\t") + $/))
+ assert_raise(SamError) { sam.each }
+ fields << "*"
+ end
+ end
+
+ test "#each with ok field count dont raise" do
+ sam = Sam.new(StringIO.new(SAM_DATA))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad qname raises" do
+ sam = Sam.new(StringIO.new(" \t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok qname dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised(SamError) { sam.each }
+ end
+
+ test "#each with bad flag raises" do
+ sam = Sam.new(StringIO.new("*\t-1\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t65536\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok flag dont raise" do
+ sam = Sam.new(StringIO.new("*\t0\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t65535\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad rname raises" do
+ sam = Sam.new(StringIO.new("*\t*\t \t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok rname dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad pos raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t-1\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t536870912\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok pos dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t0\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t536870911\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad mapq raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t-1\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t256\t*\t*\t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok mapq dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t0\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t255\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad rnext raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t \t*\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok rnext dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t=\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t!\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad pnext raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t-1\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t536870912\t*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok pnext dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t0\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t536870911\t*\t*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad tlen raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t-536870912\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t536870912\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok tlen dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t-536870911\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t536870911\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad seq raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t \t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok seq dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\tATCGatcg=.\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with bad qual raises" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t \n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with ok qual dont raise" do
+ sam = Sam.new(StringIO.new("*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t@\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with rname missing from header raises" do
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\tMIS\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with rname present in header dont raise" do
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\tref\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#each with rnext missing from header raises" do
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\tMIS\t*\t\*\t*\t*\n"))
+ assert_raise(SamError) { sam.each }
+ end
+
+ test "#each with rnext present in header dont raise" do
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t*\t*\t\*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\t=\t*\t\*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+
+ sam = Sam.new(StringIO.new("@SQ\tSN:ref\tLN:45\n*\t*\t*\t*\t*\t*\tref\t*\t\*\t*\t*\n"))
+ assert_nothing_raised { sam.each }
+ end
+
+ test "#to_bp returns correctly" do
+ string = "ID00036734\t0\tgi48994873\t366089\t37\t37M1I62M\t*\t0\t0\tGTTCCGCTATCGGCTGAATTTGATTGCGAGTGAGATATTTTATGCCAGCCAGCCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGCTAACAGCG\t*\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:99\n"
+
+ sam = Sam.new(StringIO.new(string))
+
+ sam.each do |s|
+ assert_equal("SAM", Sam.to_bp(s)[:REC_TYPE])
+ assert_equal("ID00036734", Sam.to_bp(s)[:Q_ID])
+ assert_equal("-", Sam.to_bp(s)[:STRAND])
+ assert_equal("gi48994873", Sam.to_bp(s)[:S_ID])
+ assert_equal(366089, Sam.to_bp(s)[:S_BEG])
+ assert_equal(37, Sam.to_bp(s)[:MAPQ])
+ assert_equal("37M1I62M", Sam.to_bp(s)[:CIGAR])
+ assert_equal("GTTCCGCTATCGGCTGAATTTGATTGCGAGTGAGATATTTTATGCCAGCCAGCCAGACGCAGACGCGCCGAGACAGAACTTAATGGGCCCGCTAACAGCG", Sam.to_bp(s)[:SEQ])
+ assert_equal("37:->T", Sam.to_bp(s)[:ALIGN])
+ end
+ end
+
+ test "#to_bp alignment descriptor without mismatch or indel returns correctly" do
+ string = "q_id\t0\ts_id\t1000\t40\t10M\t*\t0\t0\tGTTCCGCTAT\t*\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:10\n"
+
+ sam = Sam.new(StringIO.new(string))
+
+ sam.each do |s|
+ assert_equal(nil, Sam.to_bp(s)[:ALIGN])
+ end
+ end
+
+ test "#to_bp alignment descriptor with mismatches returns correctly" do
+ string = "q_id\t0\ts_id\t1000\t40\t10M\t*\t0\t0\tgTTCCGCTAt\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:0C8A\n"
+
+ sam = Sam.new(StringIO.new(string))
+
+ sam.each do |s|
+ assert_equal("0:C>g,9:A>t", Sam.to_bp(s)[:ALIGN])
+ end
+ end
+
+ test "#to_bp alignment descriptor with insertions returns correctly" do
+ string = "q_id\t0\ts_id\t1000\t40\t1I10M1I\t*\t0\t0\taGTTCCGCTATc\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:12\n"
+
+ sam = Sam.new(StringIO.new(string))
+
+ sam.each do |s|
+ assert_equal("0:->a,11:->c", Sam.to_bp(s)[:ALIGN])
+ end
+ end
+
+ test "#to_bp alignment descriptor with deletions returns correctly" do
+ string = "q_id\t0\ts_id\t1000\t40\t2D10M\t*\t0\t0\tGTTCCGCTAT\t*\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:1\tXG:i:1\tMD:Z:^AC10\n"
+
+ sam = Sam.new(StringIO.new(string))
+
+ sam.each do |s|
+ assert_equal("0:A>-,1:C>-", Sam.to_bp(s)[:ALIGN])
+ end
+ end
end