require 'maasha/filesys'
require 'maasha/seq'
-# Error class for all exceptions to do with Genbank.
-class SamError < StandardError; end
-
REGEX_HEADER = Regexp.new(/^@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/)
REGEX_COMMENT = Regexp.new(/^@CO\t.*/)
+FLAG_MULTI = 0x1 # Template having multiple fragments in sequencing
+FLAG_ALIGNED = 0x2 # Each fragment properly aligned according to the aligner
+FLAG_UNMAPPED = 0x4 # Fragment unmapped
+FLAG_NEXT_UNMAPPED = 0x8 # Next fragment in the template unmapped
+FLAG_REVCOMP = 0x10 # SEQ being reverse complemented
+FLAG_NEXT_REVCOMP = 0x20 # SEQ of the next fragment in the template being reversed
+FLAG_FIRST = 0x40 # The first fragment in the template
+FLAG_LAST = 0x80 # The last fragment in the template
+FLAG_SECONDARY_ALIGNMENT = 0x100 # Secondary alignment
+FLAG_QUALITY_FAIL = 0x200 # Not passing quality controls
+FLAG_DUPLICATES = 0x400 # PCR or optical duplicate
+
+# Error class for all exceptions to do with Genbank.
+class SamError < StandardError; end
+
# Class to parse and write SAM files.
class Sam < Filesys
attr_accessor :io, :header
+ # Class method to convert a SAM entry
+ # to a Biopiece record.
+ def self.to_bp(sam)
+ bp = {}
+
+ bp[:REC_TYPE] = 'SAM'
+ bp[:Q_ID] = sam[:QNAME]
+ bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+'
+ bp[:S_ID] = sam[:RNAME]
+ bp[:S_BEG] = sam[:POS]
+ bp[:MAPQ] = sam[:MAPQ]
+ bp[:CIGAR] = sam[:CIGAR]
+
+ unless sam[:RNEXT] == '*'
+ bp[:Q_ID2] = sam[:RNEXT]
+ bp[:S_BEG2] = sam[:PNEXT]
+ bp[:TLEN] = sam[:TLEN]
+ end
+
+ bp[:SEQ] = sam[:SEQ].seq
+
+ unless sam[:SEQ].qual.nil?
+ bp[:SCORES] = sam[:SEQ].convert_phred2illumina!.qual
+ end
+
+ if sam.has_key? :NM and sam[:NM].to_i > 0
+ bp[:ALIGN] = self.align_descriptors(sam)
+ end
+
+ bp
+ end
+
+ # Create align descriptors according to the KISS format description:
+ # http://code.google.com/p/biopieces/wiki/KissFormat
+ def self.align_descriptors(sam)
+ offset = 0
+ insertions = 0
+ align = []
+
+ # Insertions
+ sam[:CIGAR].scan(/([0-9]+)([MIDNSHPX=])/).each do |len, op|
+ len = len.to_i
+
+ if op == 'I'
+ (0 ... len).each_with_index do |i|
+ nt = sam[:SEQ].seq[offset + i]
+
+ align << [offset + i, "->#{nt}"]
+ end
+ end
+
+ offset += len
+ end
+
+ offset = 0
+ deletions = 0
+
+ sam[:MD].scan(/\d+|\^[A-Z]+|[A-Z]+/).each do |m|
+ if m =~ /\d+/ # Matches
+ offset += m.to_i
+ elsif m[0] == '^' # Deletions
+ m[1 .. -1].each_char do |nt|
+ align << [offset, "#{nt}>-"]
+
+ deletions += 1
+ offset += 1
+ end
+ else # Mismatches
+ m.each_char do |nt|
+ nt2 = sam[:SEQ].seq[offset - deletions]
+
+ align << [offset, "#{nt}>#{nt2}"]
+
+ offset += 1
+ end
+ end
+ end
+
+ align.sort_by { |a| a.first }.map { |k,v| "#{k}:#{v}" }.join(",")
+ end
+
# Method to initialize a Sam object.
def initialize(io = nil)
@io = io
entry = {}
entry[:QNAME] = qname
- entry[:FLAG] = flag
+ entry[:FLAG] = Flag.new(flag)
entry[:RNAME] = rname
entry[:POS] = pos
entry[:MAPQ] = mapq
entry[:RNEXT] = rnext
entry[:PNEXT] = pnext
entry[:TLEN] = tlen
- entry[:SEQ] = Seq.new(qname, seq)
+ entry[:SEQ] = (qual == '*') ? Seq.new(qname, seq) : Seq.new(qname, seq, qual)
entry[:QUAL] = qual
+ # Optional fields - where some are really important! HATE HATE HATE SAM!!!
+
+ fields[11 .. -1].each do |field|
+ tag, type, val = field.split(':')
+
+ raise SamError, "Non-unique optional tag: #{tag}" if entry.has_key? tag.to_sym
+
+ # A [!-~] Printable character
+
+ # i [-+]?[0-9]+ Singed 32-bit integer
+ if type == 'i'
+ raise SamError, "Bad tag in optional field: #{field}" unless val =~ /^[-+]?[0-9]+$/
+ val = val.to_i
+ end
+
+ # f [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? Single-precision floating number
+ # Z [ !-~]+ Printable string, including space
+ # H [0-9A-F]+ Byte array in the Hex format
+ # B [cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+ Integer or numeric array
+
+ entry[tag.to_sym] = val
+ end
+
entry
end
def check_qual(qual)
raise SamError, "Bad qual: #{qual}" unless qual =~ /^[!-~]+$/
end
+
+ # Method to deconvolute the SAM flag field.
+ class Flag
+ attr_reader :flag
+
+ # Method to initialize a Flag object.
+ def initialize(flag)
+ @flag = flag
+ end
+
+ # Method to test if template have
+ # multiple fragments in sequencing.
+ def multi?
+ flag & FLAG_MULTI
+ end
+
+ # Method to test if each fragment
+ # properly aligned according to the aligner.
+ def aligned?
+ flag & FLAG_ALIGNED
+ end
+
+ # Method to test if the fragment was unmapped.
+ def unmapped?
+ flag & FLAG_UNMAPPED
+ end
+
+ # Method to test if the next fragment was unmapped.
+ def next_unmapped?
+ flag & FLAG_NEXT_UNMAPPED
+ end
+
+ # Method to test if the fragment was reverse complemented.
+ def revcomp?
+ flag & FLAG_REVCOMP
+ end
+
+ # Method to test if the next fragment was reverse complemented.
+ def next_revcomp?
+ flag & FLAG_NEXT_REVCOMP
+ end
+
+ # Method to test if the fragment was first in the template.
+ def first?
+ flag & FLAG_FIRST
+ end
+
+ # Method to test if the fragment was last in the template.
+ def last?
+ flag & FLAG_LAST
+ end
+
+ # Method to test for secondary alignment.
+ def secondary_alignment?
+ flag & FLAG_SECONDARY_ALIGNMENT
+ end
+
+ # Method to test for quality fail.
+ def quality_fail?
+ flag & FLAG_QUALITY_FAIL
+ end
+
+ # Method to test for PCR or optical duplicates.
+ def duplicates?
+ flag & FLAG_DUPLICATES
+ end
+ end
end