1 # Copyright (C) 2007-2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 require 'maasha/seq/backtrack'
30 # Error class for all exceptions to do with Trim.
31 class TrimError < StandardError; end
33 # Module containing methods for end trimming sequences with suboptimal quality
36 # Method to progressively trim a Seq object sequence from the right end until
37 # a run of min_len residues with quality scores above min_qual is encountered.
38 def quality_trim_right(min_qual, min_len = 1)
39 check_trim_args(min_qual)
41 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
46 # Method to progressively trim a Seq object sequence from the right end until
47 # a run of min_len residues with quality scores above min_qual is encountered.
48 def quality_trim_right!(min_qual, min_len = 1)
49 check_trim_args(min_qual)
51 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
53 self.seq = self.seq[0, pos]
54 self.qual = self.qual[0, pos] if self.qual
59 # Method to progressively trim a Seq object sequence from the left end until
60 # a run of min_len residues with quality scores above min_qual is encountered.
61 def quality_trim_left(min_qual, min_len = 1)
62 check_trim_args(min_qual)
64 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
69 # Method to progressively trim a Seq object sequence from the left end until
70 # a run of min_len residues with quality scores above min_qual is encountered.
71 def quality_trim_left!(min_qual, min_len = 1)
72 check_trim_args(min_qual)
74 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
76 self.seq = self.seq[pos .. -1]
77 self.qual = self.qual[pos .. -1] if self.qual
82 # Method to progressively trim a Seq object sequence from both ends until a
83 # run of min_len residues with quality scores above min_qual is encountered.
84 def quality_trim(min_qual, min_len = 1)
85 check_trim_args(min_qual)
87 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
88 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
90 pos_left = pos_right if pos_left > pos_right
92 self[pos_left ... pos_right]
95 # Method to progressively trim a Seq object sequence from both ends until a
96 # run of min_len residues with quality scores above min_qual is encountered.
97 def quality_trim!(min_qual, min_len = 1)
98 check_trim_args(min_qual)
100 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
101 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
103 pos_left = pos_right if pos_left > pos_right
105 self.seq = self.seq[pos_left ... pos_right]
106 self.qual = self.qual[pos_left ... pos_right] if self.qual
111 # Method to locate a pattern in a sequence and trim all sequence to the left
112 # including the pattern.
113 def patmatch_trim_left!(pattern, options = {})
114 match = self.patmatch(pattern, options)
119 self.seq = self.seq[match.pos + match.length .. stop]
120 self.qual = self.qual[match.pos + match.length .. stop] if self.qual
128 # Method to locate a pattern in a sequence and trim all sequence to the right
129 # including the pattern.
130 def patmatch_trim_right!(pattern, options = {})
131 match = self.patmatch(pattern, options)
134 self.seq = self.seq[0 ... match.pos]
135 self.qual = self.qual[0 ... match.pos] if self.qual
145 # Method to check the arguments for trimming and raise on bad sequence, qualities,
147 def check_trim_args(min_qual)
148 raise TrimError, "no sequence" if self.seq.nil?
149 raise TrimError, "no quality score" if self.qual.nil?
150 unless (Seq::SCORE_MIN .. Seq::SCORE_MAX).include? min_qual
151 raise TrimError, "minimum quality value: #{min_qual} out of range #{Seq::SCORE_MIN} .. #{Seq::SCORE_MAX}"
155 # Inline C functions for speed below.
157 # Method for locating the right trim position and return this.
159 VALUE trim_right_pos_c(
160 VALUE _qual, // quality score string
161 VALUE _len, // length of quality score string
162 VALUE _min_qual, // minimum quality score
163 VALUE _min_len, // minimum quality length
164 VALUE _score_base // score base
167 unsigned char *qual = StringValuePtr(_qual);
168 unsigned int len = FIX2UINT(_len);
169 unsigned int min_qual = FIX2UINT(_min_qual);
170 unsigned int min_len = FIX2UINT(_min_len);
171 unsigned int score_base = FIX2UINT(_score_base);
180 while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base >= min_qual))
184 return UINT2NUM(len - i);
195 # Method for locating the left trim position and return this.
197 VALUE trim_left_pos_c(
198 VALUE _qual, // quality score string
199 VALUE _len, // length of quality score string
200 VALUE _min_qual, // minimum quality score
201 VALUE _min_len, // minimum quality length
202 VALUE _score_base // score base
205 unsigned char *qual = StringValuePtr(_qual);
206 unsigned int len = FIX2UINT(_len);
207 unsigned int min_qual = FIX2UINT(_min_qual);
208 unsigned int min_len = FIX2UINT(_min_len);
209 unsigned int score_base = FIX2UINT(_score_base);
218 while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base >= min_qual))