--- /dev/null
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Methods to handle extended CIGAR format used in the
+# SAM format version v1.4-r962 - April 17, 2011
+
+# http://samtools.sourceforge.net/SAM1.pdf
+
+require 'pp'
+
+# Error class for all exceptions to do with CIGAR.
+class CigarError < StandardError; end
+
+# Class to parse and write SAM files.
+class Cigar
+ attr_accessor :cigar
+
+ def initialize(cigar)
+ @cigar = cigar
+
+ check_cigar
+ end
+
+ def each
+ cigar.scan(/(\d+)([MIDNSHPX=])/).each do |len, op|
+ yield [len.to_i, op]
+ end
+ end
+
+ def length
+ total = 0
+
+ self.each do |len, op|
+ total += len unless op == 'I'
+ end
+
+ total
+ end
+
+ def matches
+ total = 0
+
+ self.each do |len, op|
+ total += len if op == 'M'
+ end
+
+ total
+ end
+
+ def insertions
+ total = 0
+
+ self.each do |len, op|
+ total += len if op == 'I'
+ end
+
+ total
+ end
+
+ def deletions
+ total = 0
+
+ self.each do |len, op|
+ total += len if op == 'D'
+ end
+
+ total
+ end
+
+ def clip_hard
+ total = 0
+
+ self.each do |len, op|
+ total += len if op == 'H'
+ end
+
+ total
+ end
+
+ def clip_soft
+ total = 0
+
+ self.each do |len, op|
+ total += len if op == 'S'
+ end
+
+ total
+ end
+
+ private
+
+ def check_cigar
+ unless cigar =~ /^(\*|([0-9]+[MIDNSHPX=])+)$/
+ raise CigarError, "Bad cigar format: #{cigar}"
+ end
+
+ if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").match('H')
+ raise CigarError, "Bad cigar with internal H: #{cigar}"
+ end
+
+ if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").gsub(/^[0-9]+S|[0-9]+S$/, "").match('S')
+ raise CigarError, "Bad cigar with internal S: #{cigar}"
+ end
+ end
+end
deletions += 1
offset += 1
end
- else # Mismatches
+ else # Mismatches
m.each_char do |nt|
nt2 = sam[:SEQ].seq[offset - deletions]
end
end
- align.sort_by { |a| a.first }.map { |k,v| "#{k}:#{v}" }.join(",")
+ align.sort_by { |a| a.first }.map { |k, v| "#{k}:#{v}" }.join(",")
end
# Method to initialize a Sam object.
--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'test/unit'
+require 'maasha/cigar'
+require 'pp'
+
+class CigarTest < Test::Unit::TestCase
+ def test_Cigar_new_with_bad_cigar_raises
+ assert_raise(CigarError) { Cigar.new("@") }
+ assert_raise(CigarError) { Cigar.new("1M1H1M") }
+ assert_raise(CigarError) { Cigar.new("1M1S1M") }
+ end
+
+ def test_Cigar_new_with_ok_cigar_dont_raise
+ assert_nothing_raised { Cigar.new("1M") }
+ assert_nothing_raised { Cigar.new("1H1M1H") }
+ assert_nothing_raised { Cigar.new("1S1M1S") }
+ assert_nothing_raised { Cigar.new("1H1S1M1S1H") }
+ end
+
+ def test_Cigar_each_returns_correctly
+ cigar = Cigar.new("10M")
+
+ cigar.each { |len, op|
+ assert_equal(10, len)
+ assert_equal("M", op)
+ }
+ end
+
+ def test_Cigar_length_returns_correctly
+ assert_equal(6, Cigar.new("1M2S3H").length)
+ end
+
+ def test_Cigar_matches_returns_correctly
+ assert_equal(1, Cigar.new("1M2S3H").matches)
+ end
+
+ def test_Cigar_insertions_returns_correctly
+ assert_equal(1, Cigar.new("10M1I").insertions)
+ end
+
+ def test_Cigar_deletions_returns_correctly
+ assert_equal(2, Cigar.new("10M2D").deletions)
+ end
+
+ def test_Cigar_clip_hard_returns_correctly
+ assert_equal(3, Cigar.new("10M3H").clip_hard)
+ end
+
+ def test_Cigar_clip_soft_returns_correctly
+ assert_equal(4, Cigar.new("10M4S").clip_soft)
+ end
+end