From a77640cb6c17bb33947792cfa0b8074dd13df9f7 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 29 Aug 2011 13:10:51 +0000 Subject: [PATCH] added cigar.rb and test_cigar.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1502 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/cigar.rb | 126 ++++++++++++++++++++++++++++ code_ruby/lib/maasha/sam.rb | 4 +- code_ruby/test/maasha/test_cigar.rb | 77 +++++++++++++++++ 3 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 code_ruby/lib/maasha/cigar.rb create mode 100755 code_ruby/test/maasha/test_cigar.rb diff --git a/code_ruby/lib/maasha/cigar.rb b/code_ruby/lib/maasha/cigar.rb new file mode 100644 index 0000000..765a4fa --- /dev/null +++ b/code_ruby/lib/maasha/cigar.rb @@ -0,0 +1,126 @@ +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Methods to handle extended CIGAR format used in the +# SAM format version v1.4-r962 - April 17, 2011 + +# http://samtools.sourceforge.net/SAM1.pdf + +require 'pp' + +# Error class for all exceptions to do with CIGAR. +class CigarError < StandardError; end + +# Class to parse and write SAM files. +class Cigar + attr_accessor :cigar + + def initialize(cigar) + @cigar = cigar + + check_cigar + end + + def each + cigar.scan(/(\d+)([MIDNSHPX=])/).each do |len, op| + yield [len.to_i, op] + end + end + + def length + total = 0 + + self.each do |len, op| + total += len unless op == 'I' + end + + total + end + + def matches + total = 0 + + self.each do |len, op| + total += len if op == 'M' + end + + total + end + + def insertions + total = 0 + + self.each do |len, op| + total += len if op == 'I' + end + + total + end + + def deletions + total = 0 + + self.each do |len, op| + total += len if op == 'D' + end + + total + end + + def clip_hard + total = 0 + + self.each do |len, op| + total += len if op == 'H' + end + + total + end + + def clip_soft + total = 0 + + self.each do |len, op| + total += len if op == 'S' + end + + total + end + + private + + def check_cigar + unless cigar =~ /^(\*|([0-9]+[MIDNSHPX=])+)$/ + raise CigarError, "Bad cigar format: #{cigar}" + end + + if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").match('H') + raise CigarError, "Bad cigar with internal H: #{cigar}" + end + + if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").gsub(/^[0-9]+S|[0-9]+S$/, "").match('S') + raise CigarError, "Bad cigar with internal S: #{cigar}" + end + end +end diff --git a/code_ruby/lib/maasha/sam.rb b/code_ruby/lib/maasha/sam.rb index be4e9f4..79aa5ad 100644 --- a/code_ruby/lib/maasha/sam.rb +++ b/code_ruby/lib/maasha/sam.rb @@ -119,7 +119,7 @@ class Sam < Filesys deletions += 1 offset += 1 end - else # Mismatches + else # Mismatches m.each_char do |nt| nt2 = sam[:SEQ].seq[offset - deletions] @@ -130,7 +130,7 @@ class Sam < Filesys end end - align.sort_by { |a| a.first }.map { |k,v| "#{k}:#{v}" }.join(",") + align.sort_by { |a| a.first }.map { |k, v| "#{k}:#{v}" }.join(",") end # Method to initialize a Sam object. diff --git a/code_ruby/test/maasha/test_cigar.rb b/code_ruby/test/maasha/test_cigar.rb new file mode 100755 index 0000000..d4e014f --- /dev/null +++ b/code_ruby/test/maasha/test_cigar.rb @@ -0,0 +1,77 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'test/unit' +require 'maasha/cigar' +require 'pp' + +class CigarTest < Test::Unit::TestCase + def test_Cigar_new_with_bad_cigar_raises + assert_raise(CigarError) { Cigar.new("@") } + assert_raise(CigarError) { Cigar.new("1M1H1M") } + assert_raise(CigarError) { Cigar.new("1M1S1M") } + end + + def test_Cigar_new_with_ok_cigar_dont_raise + assert_nothing_raised { Cigar.new("1M") } + assert_nothing_raised { Cigar.new("1H1M1H") } + assert_nothing_raised { Cigar.new("1S1M1S") } + assert_nothing_raised { Cigar.new("1H1S1M1S1H") } + end + + def test_Cigar_each_returns_correctly + cigar = Cigar.new("10M") + + cigar.each { |len, op| + assert_equal(10, len) + assert_equal("M", op) + } + end + + def test_Cigar_length_returns_correctly + assert_equal(6, Cigar.new("1M2S3H").length) + end + + def test_Cigar_matches_returns_correctly + assert_equal(1, Cigar.new("1M2S3H").matches) + end + + def test_Cigar_insertions_returns_correctly + assert_equal(1, Cigar.new("10M1I").insertions) + end + + def test_Cigar_deletions_returns_correctly + assert_equal(2, Cigar.new("10M2D").deletions) + end + + def test_Cigar_clip_hard_returns_correctly + assert_equal(3, Cigar.new("10M3H").clip_hard) + end + + def test_Cigar_clip_soft_returns_correctly + assert_equal(4, Cigar.new("10M4S").clip_soft) + end +end -- 2.39.2