+ # Class method to convert a SAM entry
+ # to a Biopiece record.
+ def self.to_bp(sam)
+ bp = {}
+
+ bp[:REC_TYPE] = 'SAM'
+ bp[:Q_ID] = sam[:QNAME]
+ bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+'
+ bp[:S_ID] = sam[:RNAME]
+ bp[:S_BEG] = sam[:POS]
+ bp[:MAPQ] = sam[:MAPQ]
+ bp[:CIGAR] = sam[:CIGAR]
+
+ unless sam[:RNEXT] == '*'
+ bp[:Q_ID2] = sam[:RNEXT]
+ bp[:S_BEG2] = sam[:PNEXT]
+ bp[:TLEN] = sam[:TLEN]
+ end
+
+ bp[:SEQ] = sam[:SEQ].seq
+
+ unless sam[:SEQ].qual.nil?
+ bp[:SCORES] = sam[:SEQ].convert_phred2illumina!.qual
+ end
+
+ if sam.has_key? :NM and sam[:NM].to_i > 0
+ bp[:ALIGN] = self.align_descriptors(sam)
+ end
+
+ bp
+ end
+
+ # Create align descriptors according to the KISS format description:
+ # http://code.google.com/p/biopieces/wiki/KissFormat
+ def self.align_descriptors(sam)
+ offset = 0
+ insertions = 0
+ align = []
+
+ # Insertions
+ sam[:CIGAR].scan(/([0-9]+)([MIDNSHPX=])/).each do |len, op|
+ len = len.to_i
+
+ if op == 'I'
+ (0 ... len).each_with_index do |i|
+ nt = sam[:SEQ].seq[offset + i]
+
+ align << [offset + i, "->#{nt}"]
+ end
+ end
+
+ offset += len
+ end
+
+ offset = 0
+ deletions = 0
+
+ sam[:MD].scan(/\d+|\^[A-Z]+|[A-Z]+/).each do |m|
+ if m =~ /\d+/ # Matches
+ offset += m.to_i
+ elsif m[0] == '^' # Deletions
+ m[1 .. -1].each_char do |nt|
+ align << [offset, "#{nt}>-"]
+
+ deletions += 1
+ offset += 1
+ end
+ else # Mismatches
+ m.each_char do |nt|
+ nt2 = sam[:SEQ].seq[offset - deletions]
+
+ align << [offset, "#{nt}>#{nt2}"]
+
+ offset += 1
+ end
+ end
+ end
+
+ align.sort_by { |a| a.first }.map { |k,v| "#{k}:#{v}" }.join(",")
+ end
+