1 # Copyright (C) 2007-2011 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Methods to handle extended CIGAR format used in the
26 # SAM format version v1.4-r962 - April 17, 2011
28 # http://samtools.sourceforge.net/SAM1.pdf
30 # Error class for all exceptions to do with CIGAR.
31 class CigarError < StandardError; end
33 # Class to manipulate CIGAR strings.
37 # Method to initialize a CIGAR string.
44 # Method to convert the CIGAR string to
50 # Method to iterate over the length and operators
53 cigar.scan(/(\d+)([MIDNSHPX=])/).each do |len, op|
58 # Method to return the number length of
59 # the residues described in the CIGAR string.
60 # This length should match the length of the
65 self.each do |len, op|
66 total += len unless op == 'I'
72 # Method to return the number of matched
73 # residues in the CIGAR string.
77 self.each do |len, op|
78 total += len if op == 'M'
84 # Method to return the number of inserted
85 # residues in the CIGAR string.
89 self.each do |len, op|
90 total += len if op == 'I'
96 # Method to return the number of deleted
97 # residues in the CIGAR string.
101 self.each do |len, op|
102 total += len if op == 'D'
108 # Method to return the number of hard clipped
109 # residues in the CIGAR string.
113 self.each do |len, op|
114 total += len if op == 'H'
120 # Method to return the number of soft clipped
121 # residues in the CIGAR string.
125 self.each do |len, op|
126 total += len if op == 'S'
134 # Method to check that the CIGAR string is formatted
135 # correctly, including hard clipping and soft clipping
136 # that cant be located internally.
138 unless cigar =~ /^(\*|([0-9]+[MIDNSHPX=])+)$/
139 raise CigarError, "Bad cigar format: #{cigar}"
142 if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").match('H')
143 raise CigarError, "Bad cigar with internal H: #{cigar}"
146 if cigar.gsub(/^[0-9]+H|[0-9]+H$/, "").gsub(/^[0-9]+S|[0-9]+S$/, "").match('S')
147 raise CigarError, "Bad cigar with internal S: #{cigar}"