2 # This program is free software; you can redistribute it and/or
3 # modify it under the terms of the GNU General Public License
4 # as published by the Free Software Foundation; either version 2
5 # of the License, or (at your option) any later version.
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
12 # You should have received a copy of the GNU General Public License
13 # along with this program; if not, write to the Free Software
14 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 # http://www.gnu.org/copyleft/gpl.html
18 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
20 # This software is part of the Biopieces framework (www.biopieces.org).
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 # Error class for all exceptions to do with Trim.
27 class TrimError < StandardError; end
29 # Module containing methods for end trimming sequences with suboptimal quality
32 # Method to progressively trim a Seq object sequence from the right end until
33 # a run of min_len residues with quality scores above min_qual is encountered.
34 def quality_trim_right(min_qual, min_len = 1)
35 check_trim_args(min_qual)
37 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
42 # Method to progressively trim a Seq object sequence from the right end until
43 # a run of min_len residues with quality scores above min_qual is encountered.
44 def quality_trim_right!(min_qual, min_len = 1)
45 check_trim_args(min_qual)
47 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
52 # Method to progressively trim a Seq object sequence from the left end until
53 # a run of min_len residues with quality scores above min_qual is encountered.
54 def quality_trim_left(min_qual, min_len = 1)
55 check_trim_args(min_qual)
57 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
62 # Method to progressively trim a Seq object sequence from the left end until
63 # a run of min_len residues with quality scores above min_qual is encountered.
64 def quality_trim_left!(min_qual, min_len = 1)
65 check_trim_args(min_qual)
67 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
72 # Method to progressively trim a Seq object sequence from both ends until a
73 # run of min_len residues with quality scores above min_qual is encountered.
74 def quality_trim(min_qual, min_len = 1)
75 check_trim_args(min_qual)
77 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
78 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
80 pos_left = pos_right if pos_left > pos_right
82 self.subseq(pos_left, pos_right - pos_left)
85 # Method to progressively trim a Seq object sequence from both ends until a
86 # run of min_len residues with quality scores above min_qual is encountered.
87 def quality_trim!(min_qual, min_len = 1)
88 check_trim_args(min_qual)
90 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
91 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
93 pos_left = pos_right if pos_left > pos_right
95 self.subseq!(pos_left, pos_right - pos_left)
100 # Method to check the arguments for trimming and raise on bad sequence, qualities,
102 def check_trim_args(min_qual)
103 raise TrimError, "no sequence" if self.seq.nil?
104 raise TrimError, "no quality score" if self.qual.nil?
105 unless (Seq::SCORE_MIN .. Seq::SCORE_MAX).include? min_qual
106 raise TrimError, "minimum quality value: #{min_qual} out of range #{Seq::SCORE_MIN} .. #{Seq::SCORE_MAX}"
110 # Inline C functions for speed below.
112 # Method for locating the right trim position and return this.
114 VALUE trim_right_pos_c(
115 VALUE _qual, // quality score string
116 VALUE _len, // length of quality score string
117 VALUE _min_qual, // minimum quality score
118 VALUE _min_len, // minimum quality length
119 VALUE _score_base // score base
122 unsigned char *qual = StringValuePtr(_qual);
123 unsigned int len = FIX2UINT(_len);
124 unsigned int min_qual = FIX2UINT(_min_qual);
125 unsigned int min_len = FIX2UINT(_min_len);
126 unsigned int score_base = FIX2UINT(_score_base);
135 while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base >= min_qual))
139 return UINT2NUM(len - i);
150 # Method for locating the left trim position and return this.
152 VALUE trim_left_pos_c(
153 VALUE _qual, // quality score string
154 VALUE _len, // length of quality score string
155 VALUE _min_qual, // minimum quality score
156 VALUE _min_len, // minimum quality length
157 VALUE _score_base // score base
160 unsigned char *qual = StringValuePtr(_qual);
161 unsigned int len = FIX2UINT(_len);
162 unsigned int min_qual = FIX2UINT(_min_qual);
163 unsigned int min_len = FIX2UINT(_min_len);
164 unsigned int score_base = FIX2UINT(_score_base);
173 while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base >= min_qual))