2 # This program is free software; you can redistribute it and/or
3 # modify it under the terms of the GNU General Public License
4 # as published by the Free Software Foundation; either version 2
5 # of the License, or (at your option) any later version.
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
12 # You should have received a copy of the GNU General Public License
13 # along with this program; if not, write to the Free Software
14 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 # http://www.gnu.org/copyleft/gpl.html
18 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
20 # This software is part of the Biopieces framework (www.biopieces.org).
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 class TrimError < StandardError; end
28 # Module containing methods for end trimming sequences with suboptimal quality
31 # Method to progressively trim a Seq object sequence from the right end until
32 # a run of min_len residues with quality scores above min_qual is encountered.
33 def quality_trim_right(min_qual, min_len = 1)
34 check_trim_args(min_qual)
36 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
41 # Method to progressively trim a Seq object sequence from the right end until
42 # a run of min_len residues with quality scores above min_qual is encountered.
43 def quality_trim_right!(min_qual, min_len = 1)
44 check_trim_args(min_qual)
46 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
51 # Method to progressively trim a Seq object sequence from the left end until
52 # a run of min_len residues with quality scores above min_qual is encountered.
53 def quality_trim_left(min_qual, min_len = 1)
54 check_trim_args(min_qual)
56 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
61 # Method to progressively trim a Seq object sequence from the left end until
62 # a run of min_len residues with quality scores above min_qual is encountered.
63 def quality_trim_left!(min_qual, min_len = 1)
64 check_trim_args(min_qual)
66 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
71 # Method to progressively trim a Seq object sequence from both ends until a
72 # run of min_len residues with quality scores above min_qual is encountered.
73 def quality_trim(min_qual, min_len = 1)
74 check_trim_args(min_qual)
76 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
77 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
79 pos_left = pos_right if pos_left > pos_right
81 self.subseq(pos_left, pos_right - pos_left)
84 # Method to progressively trim a Seq object sequence from both ends until a
85 # run of min_len residues with quality scores above min_qual is encountered.
86 def quality_trim!(min_qual, min_len = 1)
87 check_trim_args(min_qual)
89 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
90 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE)
92 pos_left = pos_right if pos_left > pos_right
94 self.subseq!(pos_left, pos_right - pos_left)
99 # Method to check the arguments for trimming and raise on bad sequence, qualities,
101 def check_trim_args(min_qual)
102 raise TrimError, "no sequence" if self.seq.nil?
103 raise TrimError, "no quality score" if self.qual.nil?
104 unless (SCORE_MIN .. SCORE_MAX).include? min_qual
105 raise TrimError, "minimum quality value: #{min_qual} out of range #{SCORE_MIN} .. #{SCORE_MAX}"
109 # Inline C functions for speed below.
111 # Method for locating the right trim position and return this.
113 VALUE trim_right_pos_c(
114 VALUE _qual, // quality score string
115 VALUE _len, // length of quality score string
116 VALUE _min_qual, // minimum quality score
117 VALUE _min_len, // minimum quality length
118 VALUE _score_base // score base
121 unsigned char *qual = StringValuePtr(_qual);
122 unsigned int len = FIX2UINT(_len);
123 unsigned int min_qual = FIX2UINT(_min_qual);
124 unsigned int min_len = FIX2UINT(_min_len);
125 unsigned int score_base = FIX2UINT(_score_base);
134 while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base > min_qual))
138 return UINT2NUM(len - i);
149 # Method for locating the left trim position and return this.
151 VALUE trim_left_pos_c(
152 VALUE _qual, // quality score string
153 VALUE _len, // length of quality score string
154 VALUE _min_qual, // minimum quality score
155 VALUE _min_len, // minimum quality length
156 VALUE _score_base // score base
159 unsigned char *qual = StringValuePtr(_qual);
160 unsigned int len = FIX2UINT(_len);
161 unsigned int min_qual = FIX2UINT(_min_qual);
162 unsigned int min_len = FIX2UINT(_min_len);
163 unsigned int score_base = FIX2UINT(_score_base);
172 while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base > min_qual))