X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq%2Ftrim.rb;h=2ca94542b65c3ed3e6d536b460095295b1f13181;hb=3656f15bb59e2eb7eff628bae117db6479b2f03f;hp=ad8c6930e507729d41336b99adc1e16b7148f1f9;hpb=37a8910b7465c8e6aeb2c8211f083f0a40ea2380;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq/trim.rb b/code_ruby/lib/maasha/seq/trim.rb index ad8c693..2ca9454 100644 --- a/code_ruby/lib/maasha/seq/trim.rb +++ b/code_ruby/lib/maasha/seq/trim.rb @@ -1,3 +1,4 @@ +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -22,6 +23,9 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'inline' +require 'maasha/seq/backtrack' + +include BackTrack # Error class for all exceptions to do with Trim. class TrimError < StandardError; end @@ -34,9 +38,9 @@ module Trim def quality_trim_right(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) - self.subseq(0, pos) + self[0, pos] end # Method to progressively trim a Seq object sequence from the right end until @@ -44,9 +48,12 @@ module Trim def quality_trim_right!(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + + self.seq = self.seq[0, pos] + self.qual = self.qual[0, pos] if self.qual - self.subseq!(0, pos) + self end # Method to progressively trim a Seq object sequence from the left end until @@ -54,9 +61,9 @@ module Trim def quality_trim_left(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) - self.subseq(pos) + self[pos .. -1] end # Method to progressively trim a Seq object sequence from the left end until @@ -64,9 +71,12 @@ module Trim def quality_trim_left!(min_qual, min_len = 1) check_trim_args(min_qual) - pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + + self.seq = self.seq[pos .. -1] + self.qual = self.qual[pos .. -1] if self.qual - self.subseq!(pos) + self end # Method to progressively trim a Seq object sequence from both ends until a @@ -74,12 +84,12 @@ module Trim def quality_trim(min_qual, min_len = 1) check_trim_args(min_qual) - pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) - pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) pos_left = pos_right if pos_left > pos_right - self.subseq(pos_left, pos_right - pos_left) + self[pos_left ... pos_right] end # Method to progressively trim a Seq object sequence from both ends until a @@ -87,12 +97,47 @@ module Trim def quality_trim!(min_qual, min_len = 1) check_trim_args(min_qual) - pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) - pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, SCORE_BASE) + pos_right = trim_right_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) + pos_left = trim_left_pos_c(self.qual, self.length, min_qual, min_len, Seq::SCORE_BASE) pos_left = pos_right if pos_left > pos_right - self.subseq!(pos_left, pos_right - pos_left) + self.seq = self.seq[pos_left ... pos_right] + self.qual = self.qual[pos_left ... pos_right] if self.qual + + self + end + + # Method to locate a pattern in a sequence and trim all sequence to the left + # including the pattern. + def patmatch_trim_left!(pattern, options = {}) + match = self.patmatch(pattern, options) + + if match + stop = self.length + + self.seq = self.seq[match.pos + match.length .. stop] + self.qual = self.qual[match.pos + match.length .. stop] if self.qual + + return self + end + + nil + end + + # Method to locate a pattern in a sequence and trim all sequence to the right + # including the pattern. + def patmatch_trim_right!(pattern, options = {}) + match = self.patmatch(pattern, options) + + if match + self.seq = self.seq[0 ... match.pos] + self.qual = self.qual[0 ... match.pos] if self.qual + + return self + end + + nil end private @@ -102,8 +147,8 @@ module Trim def check_trim_args(min_qual) raise TrimError, "no sequence" if self.seq.nil? raise TrimError, "no quality score" if self.qual.nil? - unless (SCORE_MIN .. SCORE_MAX).include? min_qual - raise TrimError, "minimum quality value: #{min_qual} out of range #{SCORE_MIN} .. #{SCORE_MAX}" + unless (Seq::SCORE_MIN .. Seq::SCORE_MAX).include? min_qual + raise TrimError, "minimum quality value: #{min_qual} out of range #{Seq::SCORE_MIN} .. #{Seq::SCORE_MAX}" end end @@ -132,7 +177,7 @@ module Trim { c = 0; - while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base > min_qual)) + while ((c < min_len) && ((c + i) < len) && (qual[len - (c + i) - 1] - score_base >= min_qual)) c++; if (c == min_len) @@ -170,7 +215,7 @@ module Trim { c = 0; - while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base > min_qual)) + while ((c < min_len) && ((c + i) < len) && (qual[c + i] - score_base >= min_qual)) c++; if (c == min_len)