1 # Copyright (C) 2007-2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 require 'maasha/seq/backtrack'
30 # Error class for all exceptions to do with Trim.
31 class TrimError < StandardError; end
33 # Module containing methods for end trimming sequences with suboptimal quality
36 # Method to progressively trim a Seq object sequence from the right end until
37 # a run of min_len residues with quality scores above min_qual is encountered.
38 def quality_trim_right(min_qual, min_len = 1)
39 check_trim_args(min_qual)
41 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
46 # Method to progressively trim a Seq object sequence from the right end until
47 # a run of min_len residues with quality scores above min_qual is encountered.
48 def quality_trim_right!(min_qual, min_len = 1)
49 check_trim_args(min_qual)
51 pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
56 # Method to progressively trim a Seq object sequence from the left end until
57 # a run of min_len residues with quality scores above min_qual is encountered.
58 def quality_trim_left(min_qual, min_len = 1)
59 check_trim_args(min_qual)
61 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
66 # Method to progressively trim a Seq object sequence from the left end until
67 # a run of min_len residues with quality scores above min_qual is encountered.
68 def quality_trim_left!(min_qual, min_len = 1)
69 check_trim_args(min_qual)
71 pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
76 # Method to progressively trim a Seq object sequence from both ends until a
77 # run of min_len residues with quality scores above min_qual is encountered.
78 def quality_trim(min_qual, min_len = 1)
79 check_trim_args(min_qual)
81 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
82 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
84 pos_left = pos_right if pos_left > pos_right
86 self.subseq(pos_left, pos_right - pos_left)
89 # Method to progressively trim a Seq object sequence from both ends until a
90 # run of min_len residues with quality scores above min_qual is encountered.
91 def quality_trim!(min_qual, min_len = 1)
92 check_trim_args(min_qual)
94 pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
95 pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE)
97 pos_left = pos_right if pos_left > pos_right
99 self.subseq!(pos_left, pos_right - pos_left)
102 # Method to locate a pattern in a sequence and trim all sequence to the left
103 # including the pattern.
104 def patmatch_trim_left!(pattern, options = {})
105 match = self.patmatch(pattern, options)
110 self.seq = self.seq[match.pos + match.length .. stop]
111 self.qual = self.qual[match.pos + match.length .. stop] if self.qual
119 # Method to locate a pattern in a sequence and trim all sequence to the right
120 # including the pattern.
121 def patmatch_trim_right!(pattern, options = {})
122 match = self.patmatch(pattern, options)
125 self.seq = self.seq[0 ... match.pos]
126 self.qual = self.qual[0 ... match.pos] if self.qual
136 # Method to check the arguments for trimming and raise on bad sequence, qualities,
138 def check_trim_args(min_qual)
139 raise TrimError, "no sequence" if self.seq.nil?
140 raise TrimError, "no quality score" if self.qual.nil?
141 unless (Seq::SCORE_MIN .. Seq::SCORE_MAX).include? min_qual
142 raise TrimError, "minimum quality value: #{min_qual} out of range #{Seq::SCORE_MIN} .. #{Seq::SCORE_MAX}"
146 # Inline C functions for speed below.
148 # Method for locating the right trim position and return this.
150 VALUE trim_right_pos_c(
151 VALUE _qual, // quality score string
152 VALUE _len, // length of quality score string
153 VALUE _min_qual, // minimum quality score
154 VALUE _min_len, // minimum quality length
155 VALUE _score_base // score base
158 unsigned char *qual = StringValuePtr(_qual);
159 unsigned int len = FIX2UINT(_len);
160 unsigned int min_qual = FIX2UINT(_min_qual);
161 unsigned int min_len = FIX2UINT(_min_len);
162 unsigned int score_base = FIX2UINT(_score_base);
171 while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base >= min_qual))
175 return UINT2NUM(len - i);
186 # Method for locating the left trim position and return this.
188 VALUE trim_left_pos_c(
189 VALUE _qual, // quality score string
190 VALUE _len, // length of quality score string
191 VALUE _min_qual, // minimum quality score
192 VALUE _min_len, // minimum quality length
193 VALUE _score_base // score base
196 unsigned char *qual = StringValuePtr(_qual);
197 unsigned int len = FIX2UINT(_len);
198 unsigned int min_qual = FIX2UINT(_min_qual);
199 unsigned int min_len = FIX2UINT(_min_len);
200 unsigned int score_base = FIX2UINT(_score_base);
209 while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base >= min_qual))