require 'pp'
require 'maasha/filesys'
require 'maasha/seq'
+require 'maasha/cigar'
REGEX_HEADER = Regexp.new(/^@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/)
REGEX_COMMENT = Regexp.new(/^@CO\t.*/)
bp = {}
bp[:REC_TYPE] = 'SAM'
- bp[:Q_ID] = sam[:QNAME]
- bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+'
- bp[:S_ID] = sam[:RNAME]
- bp[:S_BEG] = sam[:POS]
- bp[:MAPQ] = sam[:MAPQ]
- bp[:CIGAR] = sam[:CIGAR]
+ bp[:Q_ID] = sam[:QNAME]
+ bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+'
+ bp[:S_ID] = sam[:RNAME]
+ bp[:S_BEG] = sam[:POS]
+ bp[:S_END] = sam[:POS] + sam[:SEQ].length - 1
+ bp[:MAPQ] = sam[:MAPQ]
+ bp[:CIGAR] = sam[:CIGAR].to_s
unless sam[:RNEXT] == '*'
bp[:Q_ID2] = sam[:RNEXT]
bp
end
- # Create align descriptors according to the KISS format description:
+ # Class method to convert a Biopiece record
+ # into a SAM entry.
+ def self.to_sam(bp)
+ "FISK" # FIXME
+ end
+
+ # Create alignment descriptors according to the KISS
+ # format description:
# http://code.google.com/p/biopieces/wiki/KissFormat
def self.align_descriptors(sam)
- offset = 0
- insertions = 0
- align = []
+ offset = 0
+ align = []
# Insertions
- sam[:CIGAR].scan(/([0-9]+)([MIDNSHPX=])/).each do |len, op|
- len = len.to_i
-
+ sam[:CIGAR].each do |len, op|
if op == 'I'
(0 ... len).each_with_index do |i|
nt = sam[:SEQ].seq[offset + i]
if m =~ /\d+/ # Matches
offset += m.to_i
elsif m[0] == '^' # Deletions
- m[1 .. -1].each_char do |nt|
- align << [offset, "#{nt}>-"]
-
- deletions += 1
- offset += 1
+ m.each_char do |nt|
+ unless nt == '^'
+ align << [offset, "#{nt}>-"]
+ deletions += 1
+ offset += 1
+ end
end
- else # Mismatches
+ else # Mismatches
m.each_char do |nt|
nt2 = sam[:SEQ].seq[offset - deletions]
end
end
- align.sort_by { |a| a.first }.map { |k,v| "#{k}:#{v}" }.join(",")
+ align.sort_by { |a| a.first }.map { |k, v| "#{k}:#{v}" }.join(",")
end
# Method to initialize a Sam object.
check_rname(rname)
check_pos(pos)
check_mapq(mapq)
- check_cigar(cigar, seq)
check_rnext(rnext)
check_pnext(pnext)
check_tlen(tlen)
entry[:RNAME] = rname
entry[:POS] = pos
entry[:MAPQ] = mapq
- entry[:CIGAR] = cigar
+ entry[:CIGAR] = Cigar.new(cigar)
entry[:RNEXT] = rnext
entry[:PNEXT] = pnext
entry[:TLEN] = tlen
raise SamError, "Bad mapq: #{mapq}" unless (0 .. 2**8 - 1).include? mapq
end
- # Method to check cigar string.
- def check_cigar(cigar, seq)
- raise SamError, "Bad cigar: #{cigar}" unless cigar =~ /^(\*|([0-9]+[MIDNSHPX=])+)$/
-
- unless cigar == '*'
- check_cigar_hard_clip(cigar)
- check_cigar_soft_clip(cigar)
- check_cigar_seq_len(cigar, seq) unless seq == '*'
- end
- end
-
- # Method to check cigar hard clipping only at ends.
- def check_cigar_hard_clip(cigar)
- if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").match('H')
- raise SamError, "Bad cigar with internal H: #{cigar}"
- end
- end
-
- # Method to check cigar soft clipping only at ends or H.
- def check_cigar_soft_clip(cigar)
- if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").gsub(/^[0-9]+S|[0-9]+S$/, "").match('S')
- raise SamError, "Bad cigar with internal S: #{cigar}"
- end
- end
-
- # Method to check cigar length matches sequence length.
- def check_cigar_seq_len(cigar, seq)
- cigar_len = 0
-
- cigar.scan(/([0-9]+)([MIDNSHPX=])/).each do |len, op|
- cigar_len += len.to_i if op =~ /[MISX=]/
- end
-
- if cigar_len != seq.length
- raise SamError, "cigar and sequence length mismatch: #{cigar_len} != #{seq.length}"
- end
- end
-
# Method to check if rnext, when not '*' or '='
# and @SQ header lines are present, is located
# in the header hash.
# Method to test if template have
# multiple fragments in sequencing.
def multi?
- flag & FLAG_MULTI
+ (flag & FLAG_MULTI) == 0
end
# Method to test if each fragment
# properly aligned according to the aligner.
def aligned?
- flag & FLAG_ALIGNED
+ (flag & FLAG_ALIGNED) == 0
end
# Method to test if the fragment was unmapped.
def unmapped?
- flag & FLAG_UNMAPPED
+ (flag & FLAG_UNMAPPED) == 0
end
# Method to test if the next fragment was unmapped.
def next_unmapped?
- flag & FLAG_NEXT_UNMAPPED
+ (flag & FLAG_NEXT_UNMAPPED) == 0
end
# Method to test if the fragment was reverse complemented.
def revcomp?
- flag & FLAG_REVCOMP
+ (flag & FLAG_REVCOMP) == 0
end
# Method to test if the next fragment was reverse complemented.
def next_revcomp?
- flag & FLAG_NEXT_REVCOMP
+ (flag & FLAG_NEXT_REVCOMP) == 0
end
# Method to test if the fragment was first in the template.
def first?
- flag & FLAG_FIRST
+ (flag & FLAG_FIRST) == 0
end
# Method to test if the fragment was last in the template.
def last?
- flag & FLAG_LAST
+ (flag & FLAG_LAST) == 0
end
# Method to test for secondary alignment.
def secondary_alignment?
- flag & FLAG_SECONDARY_ALIGNMENT
+ (flag & FLAG_SECONDARY_ALIGNMENT) == 0
end
# Method to test for quality fail.
def quality_fail?
- flag & FLAG_QUALITY_FAIL
+ (flag & FLAG_QUALITY_FAIL) == 0
end
# Method to test for PCR or optical duplicates.
def duplicates?
- flag & FLAG_DUPLICATES
+ (flag & FLAG_DUPLICATES) == 0
end
end
end