1 # Copyright (C) 2007-2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Error class for all exceptions to do with Trim.
28 class TrimError < StandardError; end
30 # Module containing methods for end trimming sequences with suboptimal quality
33 # Method to progressively trim a Seq object sequence from the right end until
34 # a run of min_len residues with quality scores above min_qual is encountered.
35 def quality_trim_right(min_qual, min_len = 1)
36 check_trim_args(min_qual)
38 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
43 # Method to progressively trim a Seq object sequence from the right end until
44 # a run of min_len residues with quality scores above min_qual is encountered.
45 def quality_trim_right!(min_qual, min_len = 1)
46 check_trim_args(min_qual)
48 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
53 # Method to progressively trim a Seq object sequence from the left end until
54 # a run of min_len residues with quality scores above min_qual is encountered.
55 def quality_trim_left(min_qual, min_len = 1)
56 check_trim_args(min_qual)
58 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
63 # Method to progressively trim a Seq object sequence from the left end until
64 # a run of min_len residues with quality scores above min_qual is encountered.
65 def quality_trim_left!(min_qual, min_len = 1)
66 check_trim_args(min_qual)
68 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
73 # Method to progressively trim a Seq object sequence from both ends until a
74 # run of min_len residues with quality scores above min_qual is encountered.
75 def quality_trim(min_qual, min_len = 1)
76 check_trim_args(min_qual)
78 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
79 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
81 pos_left = pos_right if pos_left > pos_right
83 self.subseq(pos_left, pos_right - pos_left)
86 # Method to progressively trim a Seq object sequence from both ends until a
87 # run of min_len residues with quality scores above min_qual is encountered.
88 def quality_trim!(min_qual, min_len = 1)
89 check_trim_args(min_qual)
91 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
92 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
94 pos_left = pos_right if pos_left > pos_right
96 self.subseq!(pos_left, pos_right - pos_left)
101 # Method to check the arguments for trimming and raise on bad sequence, qualities,
103 def check_trim_args(min_qual)
104 raise TrimError, "no sequence" if self.seq.nil?
105 raise TrimError, "no quality score" if self.qual.nil?
106 unless (Seq::SCORE_MIN .. Seq::SCORE_MAX).include? min_qual
107 raise TrimError, "minimum quality value: #{min_qual} out of range #{Seq::SCORE_MIN} .. #{Seq::SCORE_MAX}"
111 # Inline C functions for speed below.
113 # Method for locating the right trim position and return this.
115 VALUE trim_right_pos_c(
116 VALUE _qual, // quality score string
117 VALUE _len, // length of quality score string
118 VALUE _min_qual, // minimum quality score
119 VALUE _min_len, // minimum quality length
120 VALUE _score_base // score base
123 unsigned char *qual = StringValuePtr(_qual);
124 unsigned int len = FIX2UINT(_len);
125 unsigned int min_qual = FIX2UINT(_min_qual);
126 unsigned int min_len = FIX2UINT(_min_len);
127 unsigned int score_base = FIX2UINT(_score_base);
136 while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base >= min_qual))
140 return UINT2NUM(len - i);
151 # Method for locating the left trim position and return this.
153 VALUE trim_left_pos_c(
154 VALUE _qual, // quality score string
155 VALUE _len, // length of quality score string
156 VALUE _min_qual, // minimum quality score
157 VALUE _min_len, // minimum quality length
158 VALUE _score_base // score base
161 unsigned char *qual = StringValuePtr(_qual);
162 unsigned int len = FIX2UINT(_len);
163 unsigned int min_qual = FIX2UINT(_min_qual);
164 unsigned int min_len = FIX2UINT(_min_len);
165 unsigned int score_base = FIX2UINT(_score_base);
174 while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base >= min_qual))