X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fsam.rb;h=8772e6382f20913d2fca3c9076b90475d2824564;hb=a80169a9121e8537f169cd85010d2ceae3a8d4fd;hp=d71862710baa4cb16e7d60874028882bec9fcdc0;hpb=7c18441aa7cd1049632fffc6b70256c0f1f64999;p=biopieces.git diff --git a/code_ruby/lib/maasha/sam.rb b/code_ruby/lib/maasha/sam.rb index d718627..8772e63 100644 --- a/code_ruby/lib/maasha/sam.rb +++ b/code_ruby/lib/maasha/sam.rb @@ -1,4 +1,4 @@ -# Copyright (C) 2007-2011 Martin A. Hansen. +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -63,6 +63,7 @@ class Sam < Filesys bp[:STRAND] = sam[:FLAG].revcomp? ? '-' : '+' bp[:S_ID] = sam[:RNAME] bp[:S_BEG] = sam[:POS] + bp[:S_END] = sam[:POS] + sam[:SEQ].length - 1 bp[:MAPQ] = sam[:MAPQ] bp[:CIGAR] = sam[:CIGAR].to_s @@ -75,20 +76,44 @@ class Sam < Filesys bp[:SEQ] = sam[:SEQ].seq unless sam[:SEQ].qual.nil? - bp[:SCORES] = sam[:SEQ].convert_phred2illumina!.qual + bp[:SCORES] = sam[:SEQ].qual_convert!(:base_33, :base_64).qual end - if sam.has_key? :NM and sam[:NM].to_i > 0 + if sam[:NM] and sam[:NM].to_i > 0 + bp[:NM] = sam[:NM] + bp[:MD] = sam[:MD] bp[:ALIGN] = self.align_descriptors(sam) end bp end - # Class method to convert a Biopiece record - # into a SAM entry. - def self.to_sam(bp) - "FISK" # FIXME + # Class method to create a new SAM entry from a Biopiece record. + # FIXME + def self.new_bp(bp) + qname = bp[:Q_ID] + flag = 0 + rname = bp[:S_ID] + pos = bp[:S_BEG] + mapq = bp[:MAPQ] + cigar = bp[:CIGAR] + rnext = bp[:Q_ID2] || '*' + pnext = bp[:S_BEG2] || 0 + tlen = bp[:TLEN] || 0 + seq = bp[:SEQ] + qual = bp[:SCORES] || '*' + nm = "NM:i:#{bp[:NM]}" if bp[:NM] + md = "MD:Z:#{bp[:MD]}" if bp[:MD] + + flag |= FLAG_REVCOMP if bp[:STRAND] == '+' + + if qname && flag && rname && pos && mapq && cigar && rnext && pnext && tlen && seq && qual + ary = [qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual] + ary << nm if nm + ary << md if md + + ary.join("\t") + end end # Create alignment descriptors according to the KISS @@ -239,7 +264,7 @@ class Sam < Filesys @header[:SQ][:SN] = Hash.new unless @header[:SQ][:SN].is_a? Hash - if @header[:SQ][:SN].has_key? seq_name + if @header[:SQ][:SN][seq_name] raise SamError, "Non-unique sequence name: #{seq_name}" else @header[:SQ][:SN][seq_name] = hash @@ -267,13 +292,13 @@ class Sam < Filesys end end - if hash.has_key? :FO + if hash[:FO] unless hash[:FO] =~ /^\*|[ACMGRSVTWYHKDBN]+$/ raise SamError, "Bad flow order: #{hash[:FO]}" end end - if hash.has_key? :PL + if hash[:PL] unless hash[:PL] =~ /^(CAPILLARY|LS454|ILLUMINA|SOLID|HELICOS|IONTORRENT|PACBIO)$/ raise SamError, "Bad platform: #{hash[:PL]}" end @@ -281,7 +306,7 @@ class Sam < Filesys @header[:RG][:ID] = Hash.new unless @header[:RG][:ID].is_a? Hash - if @header[:RG][:ID].has_key? id + if @header[:RG][:ID][id] raise SamError, "Non-unique read group identifier: #{id}" else @header[:RG][:ID][id] = hash @@ -311,7 +336,7 @@ class Sam < Filesys @header[:PG][:ID] = Hash.new unless @header[:PG][:ID].is_a? Hash - if @header[:PG][:ID].has_key? id + if @header[:PG][:ID][id] raise SamError, "Non-unique program record identifier: #{id}" else @header[:PG][:ID][id] = hash @@ -368,7 +393,7 @@ class Sam < Filesys entry[:RNEXT] = rnext entry[:PNEXT] = pnext entry[:TLEN] = tlen - entry[:SEQ] = (qual == '*') ? Seq.new(qname, seq) : Seq.new(qname, seq, qual) + entry[:SEQ] = (qual == '*') ? Seq.new(seq_name: qname, seq: seq) : Seq.new(seq_name: qname, seq: seq, qual: qual) entry[:QUAL] = qual # Optional fields - where some are really important! HATE HATE HATE SAM!!! @@ -376,7 +401,7 @@ class Sam < Filesys fields[11 .. -1].each do |field| tag, type, val = field.split(':') - raise SamError, "Non-unique optional tag: #{tag}" if entry.has_key? tag.to_sym + raise SamError, "Non-unique optional tag: #{tag}" if entry[tag.to_sym] # A [!-~] Printable character @@ -414,7 +439,7 @@ class Sam < Filesys raise SamError, "Bad rname: #{rname}" unless rname =~ /^(\*|[!-()+-<>-~][!-~]*)$/ unless @header.empty? or rname == '*' - unless @header[:SQ][:SN].has_key? rname.to_sym + unless @header[:SQ][:SN][rname.to_sym] raise SamError, "rname not found in header hash: #{rname}" end end @@ -437,7 +462,7 @@ class Sam < Filesys raise SamError, "Bad rnext: #{rnext}" unless rnext =~ /^(\*|=|[!-()+-<>-~][!-~]*)$/ unless @header.empty? or rnext == '*' or rnext == '=' - unless @header[:SQ][:SN].has_key? rnext.to_sym + unless @header[:SQ][:SN][rnext.to_sym] raise SamError, "rnext not found in header hash: #{rnext}" end end