X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq%2Ftrim.rb;h=2ca94542b65c3ed3e6d536b460095295b1f13181;hb=3656f15bb59e2eb7eff628bae117db6479b2f03f;hp=a322dc943c313c09840973e53c1f524027bb337d;hpb=e643e1f687105eea667af1e3d9281842319099a5;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq/trim.rb b/code_ruby/lib/maasha/seq/trim.rb index a322dc9..2ca9454 100644 --- a/code_ruby/lib/maasha/seq/trim.rb +++ b/code_ruby/lib/maasha/seq/trim.rb @@ -1,3 +1,4 @@ +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -22,7 +23,11 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'inline' +require 'maasha/seq/backtrack' +include BackTrack + +# Error class for all exceptions to do with Trim. class TrimError < StandardError; end # Module containing methods for end trimming sequences with suboptimal quality @@ -33,9 +38,9 @@ module Trim def quality_trim_right(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) - self.subseq(0, pos) + self[0, pos] end # Method to progressively trim a Seq object sequence from the right end until @@ -43,9 +48,12 @@ module Trim def quality_trim_right!(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + + self.seq = self.seq[0, pos] + self.qual = self.qual[0, pos] if self.qual - self.subseq!(0, pos) + self end # Method to progressively trim a Seq object sequence from the left end until @@ -53,9 +61,9 @@ module Trim def quality_trim_left(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) - self.subseq(pos) + self[pos .. -1] end # Method to progressively trim a Seq object sequence from the left end until @@ -63,9 +71,12 @@ module Trim def quality_trim_left!(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + + self.seq = self.seq[pos .. -1] + self.qual = self.qual[pos .. -1] if self.qual - self.subseq!(pos) + self end # Method to progressively trim a Seq object sequence from both ends until a @@ -73,12 +84,12 @@ module Trim def quality_trim(min_qual, min_len = 1) check_trim_args(min_qual) - pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) - pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) pos_left = pos_right if pos_left > pos_right - self.subseq(pos_left, pos_right - pos_left) + self[pos_left ... pos_right] end # Method to progressively trim a Seq object sequence from both ends until a @@ -86,12 +97,47 @@ module Trim def quality_trim!(min_qual, min_len = 1) check_trim_args(min_qual) - pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) - pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) pos_left = pos_right if pos_left > pos_right - self.subseq!(pos_left, pos_right - pos_left) + self.seq = self.seq[pos_left ... pos_right] + self.qual = self.qual[pos_left ... pos_right] if self.qual + + self + end + + # Method to locate a pattern in a sequence and trim all sequence to the left + # including the pattern. + def patmatch_trim_left!(pattern, options = {}) + match = self.patmatch(pattern, options) + + if match + stop = self.length + + self.seq = self.seq[match.pos + match.length .. stop] + self.qual = self.qual[match.pos + match.length .. stop] if self.qual + + return self + end + + nil + end + + # Method to locate a pattern in a sequence and trim all sequence to the right + # including the pattern. + def patmatch_trim_right!(pattern, options = {}) + match = self.patmatch(pattern, options) + + if match + self.seq = self.seq[0 ... match.pos] + self.qual = self.qual[0 ... match.pos] if self.qual + + return self + end + + nil end private @@ -101,8 +147,8 @@ module Trim def check_trim_args(min_qual) raise TrimError, "no sequence" if self.seq.nil? raise TrimError, "no quality score" if self.qual.nil? - unless (SCORE_MIN .. SCORE_MAX).include? min_qual - raise TrimError, "minimum quality value: #{min_qual} out of range #{SCORE_MIN} .. #{SCORE_MAX}" + unless (Seq::SCORE_MIN .. Seq::SCORE_MAX).include? min_qual + raise TrimError, "minimum quality value: #{min_qual} out of range #{Seq::SCORE_MIN} .. #{Seq::SCORE_MAX}" end end @@ -131,7 +177,7 @@ module Trim { c = 0; - while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base > min_qual)) + while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base >= min_qual)) c++; if (c == min_len) @@ -169,7 +215,7 @@ module Trim { c = 0; - while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base > min_qual)) + while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base >= min_qual)) c++; if (c == min_len)