-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2013 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
bp = {}
bp[:REC_TYPE] = 'SAM'
- bp[:Q_ID] = sam[:QNAME]
- bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+'
- bp[:S_ID] = sam[:RNAME]
- bp[:S_BEG] = sam[:POS]
- bp[:MAPQ] = sam[:MAPQ]
- bp[:CIGAR] = sam[:CIGAR]
+ bp[:Q_ID] = sam[:QNAME]
+ bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+'
+ bp[:S_ID] = sam[:RNAME]
+ bp[:S_BEG] = sam[:POS]
+ bp[:S_END] = sam[:POS] + sam[:SEQ].length - 1
+ bp[:MAPQ] = sam[:MAPQ]
+ bp[:CIGAR] = sam[:CIGAR].to_s
unless sam[:RNEXT] == '*'
bp[:Q_ID2] = sam[:RNEXT]
bp[:SCORES] = sam[:SEQ].convert_phred2illumina!.qual
end
- if sam.has_key? :NM and sam[:NM].to_i > 0
+ if sam[:NM] and sam[:NM].to_i > 0
+ bp[:NM] = sam[:NM]
+ bp[:MD] = sam[:MD]
bp[:ALIGN] = self.align_descriptors(sam)
end
bp
end
- # Create align descriptors according to the KISS format description:
+ # Class method to create a new SAM entry from a Biopiece record.
+ # FIXME
+ def self.new_bp(bp)
+ qname = bp[:Q_ID]
+ flag = 0
+ rname = bp[:S_ID]
+ pos = bp[:S_BEG]
+ mapq = bp[:MAPQ]
+ cigar = bp[:CIGAR]
+ rnext = bp[:Q_ID2] || '*'
+ pnext = bp[:S_BEG2] || 0
+ tlen = bp[:TLEN] || 0
+ seq = bp[:SEQ]
+ qual = bp[:SCORES] || '*'
+ nm = "NM:i:#{bp[:NM]}" if bp[:NM]
+ md = "MD:Z:#{bp[:MD]}" if bp[:MD]
+
+ flag |= FLAG_REVCOMP if bp[:STRAND] == '+'
+
+ if qname && flag && rname && pos && mapq && cigar && rnext && pnext && tlen && seq && qual
+ ary = [qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual]
+ ary << nm if nm
+ ary << md if md
+
+ ary.join("\t")
+ end
+ end
+
+ # Create alignment descriptors according to the KISS
+ # format description:
# http://code.google.com/p/biopieces/wiki/KissFormat
def self.align_descriptors(sam)
- offset = 0
- align = []
+ offset = 0
+ align = []
# Insertions
sam[:CIGAR].each do |len, op|
if m =~ /\d+/ # Matches
offset += m.to_i
elsif m[0] == '^' # Deletions
- 1.upto(m.size - 1).each do
- nt = sam[:SEQ].seq[offset]
-
- align << [offset, "#{nt}>-"]
-
- deletions += 1
- offset += 1
+ m.each_char do |nt|
+ unless nt == '^'
+ align << [offset, "#{nt}>-"]
+ deletions += 1
+ offset += 1
+ end
end
else # Mismatches
m.each_char do |nt|
@header[:SQ][:SN] = Hash.new unless @header[:SQ][:SN].is_a? Hash
- if @header[:SQ][:SN].has_key? seq_name
+ if @header[:SQ][:SN][seq_name]
raise SamError, "Non-unique sequence name: #{seq_name}"
else
@header[:SQ][:SN][seq_name] = hash
end
end
- if hash.has_key? :FO
+ if hash[:FO]
unless hash[:FO] =~ /^\*|[ACMGRSVTWYHKDBN]+$/
raise SamError, "Bad flow order: #{hash[:FO]}"
end
end
- if hash.has_key? :PL
+ if hash[:PL]
unless hash[:PL] =~ /^(CAPILLARY|LS454|ILLUMINA|SOLID|HELICOS|IONTORRENT|PACBIO)$/
raise SamError, "Bad platform: #{hash[:PL]}"
end
@header[:RG][:ID] = Hash.new unless @header[:RG][:ID].is_a? Hash
- if @header[:RG][:ID].has_key? id
+ if @header[:RG][:ID][id]
raise SamError, "Non-unique read group identifier: #{id}"
else
@header[:RG][:ID][id] = hash
@header[:PG][:ID] = Hash.new unless @header[:PG][:ID].is_a? Hash
- if @header[:PG][:ID].has_key? id
+ if @header[:PG][:ID][id]
raise SamError, "Non-unique program record identifier: #{id}"
else
@header[:PG][:ID][id] = hash
fields[11 .. -1].each do |field|
tag, type, val = field.split(':')
- raise SamError, "Non-unique optional tag: #{tag}" if entry.has_key? tag.to_sym
+ raise SamError, "Non-unique optional tag: #{tag}" if entry[tag.to_sym]
# A [!-~] Printable character
raise SamError, "Bad rname: #{rname}" unless rname =~ /^(\*|[!-()+-<>-~][!-~]*)$/
unless @header.empty? or rname == '*'
- unless @header[:SQ][:SN].has_key? rname.to_sym
+ unless @header[:SQ][:SN][rname.to_sym]
raise SamError, "rname not found in header hash: #{rname}"
end
end
raise SamError, "Bad rnext: #{rnext}" unless rnext =~ /^(\*|=|[!-()+-<>-~][!-~]*)$/
unless @header.empty? or rnext == '*' or rnext == '='
- unless @header[:SQ][:SN].has_key? rnext.to_sym
+ unless @header[:SQ][:SN][rnext.to_sym]
raise SamError, "rnext not found in header hash: #{rnext}"
end
end
# Method to test if template have
# multiple fragments in sequencing.
def multi?
- flag & FLAG_MULTI
+ (flag & FLAG_MULTI) == 0
end
# Method to test if each fragment
# properly aligned according to the aligner.
def aligned?
- flag & FLAG_ALIGNED
+ (flag & FLAG_ALIGNED) == 0
end
# Method to test if the fragment was unmapped.
def unmapped?
- flag & FLAG_UNMAPPED
+ (flag & FLAG_UNMAPPED) == 0
end
# Method to test if the next fragment was unmapped.
def next_unmapped?
- flag & FLAG_NEXT_UNMAPPED
+ (flag & FLAG_NEXT_UNMAPPED) == 0
end
# Method to test if the fragment was reverse complemented.
def revcomp?
- flag & FLAG_REVCOMP
+ (flag & FLAG_REVCOMP) == 0
end
# Method to test if the next fragment was reverse complemented.
def next_revcomp?
- flag & FLAG_NEXT_REVCOMP
+ (flag & FLAG_NEXT_REVCOMP) == 0
end
# Method to test if the fragment was first in the template.
def first?
- flag & FLAG_FIRST
+ (flag & FLAG_FIRST) == 0
end
# Method to test if the fragment was last in the template.
def last?
- flag & FLAG_LAST
+ (flag & FLAG_LAST) == 0
end
# Method to test for secondary alignment.
def secondary_alignment?
- flag & FLAG_SECONDARY_ALIGNMENT
+ (flag & FLAG_SECONDARY_ALIGNMENT) == 0
end
# Method to test for quality fail.
def quality_fail?
- flag & FLAG_QUALITY_FAIL
+ (flag & FLAG_QUALITY_FAIL) == 0
end
# Method to test for PCR or optical duplicates.
def duplicates?
- flag & FLAG_DUPLICATES
+ (flag & FLAG_DUPLICATES) == 0
end
end
end