X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq.rb;h=e72250d596cc5d1dd55adac34d3a15b3503048fa;hb=124ad80bd42309d1e6ea7d10dcffb86938c29069;hp=913f4d979928f96472558192a48da3e434a15f7c;hpb=fca95d66e32c26175555a254142375fffc9e0056;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 913f4d9..e72250d 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -27,6 +27,9 @@ require 'maasha/seq/digest' require 'maasha/seq/trim' require 'narray' +autoload :BackTrack, 'maasha/seq/backtrack.rb' +autoload :Dynamic, 'maasha/seq/dynamic.rb' + # Residue alphabets DNA = %w[a t c g] RNA = %w[a u c g] @@ -64,15 +67,16 @@ TRANS_TAB11 = { "GTG" => "V", "GCG" => "A", "GAG" => "E", "GGG" => "G" } -# Quality scores bases -SCORE_BASE = 64 -SCORE_MIN = 0 -SCORE_MAX = 40 # Error class for all exceptions to do with Seq. class SeqError < StandardError; end class Seq + # Quality scores bases + SCORE_BASE = 64 + SCORE_MIN = 0 + SCORE_MAX = 40 + include Digest include Trim @@ -386,7 +390,12 @@ class Seq seq_new end - # Method to shuffle a sequence readomly inline. + # Method to return a new Seq object with shuffled sequence. + def shuffle + Seq.new(self.seq_name, self.seq.split('').shuffle!.join, self.type, self.qual) + end + + # Method to shuffle a sequence randomly inline. def shuffle! self.seq = self.seq.split('').shuffle!.join self @@ -535,13 +544,30 @@ class Seq # Method that determines if a quality score string can be # absolutely identified as base 33. def qual_base33? - self.qual.match(/[!-:]/) + self.qual.match(/[!-:]/) ? true : false end # Method that determines if a quality score string can be # absolutely identified as base 64. def qual_base64? - self.qual.match(/[K-h]/) + self.qual.match(/[K-h]/) ? true : false + end + + # Method to determine if a quality score is valid. + def qual_valid?(encoding) + raise SeqError, "Missing qual" if self.qual.nil? + + case encoding.downcase + when "sanger" then return true if self.qual.match(/^[!-~]*$/) + when "454" then return true if self.qual.match(/^[@-~]*$/) + when "solexa" then return true if self.qual.match(/^[;-~]*$/) + when "illumina13" then return true if self.qual.match(/^[@-~]*$/) + when "illumina15" then return true if self.qual.match(/^[@-~]*$/) + when "illumina18" then return true if self.qual.match(/^[!-~]*$/) + else raise SeqError, "unknown quality score encoding: #{encoding}" + end + + false end # Method to convert quality scores inbetween formats. @@ -549,7 +575,7 @@ class Seq # 454 base 64, range 0-40 # Solexa base 64, range -5-40 # Illumina13 base 64, range 0-40 - # Illumina15 base 64, range 3-40 + # Illumina15 base 64, range 0-40 # Illumina18 base 33, range 0-41 def convert_scores!(from, to) unless from == to @@ -572,7 +598,7 @@ class Seq when "illumina13" then na_qual += 64 when "illumina15" then na_qual += 64 when "illumina18" then na_qual += 33 - else raise SeqError, "unknown quality score encoding: #{from}" + else raise SeqError, "unknown quality score encoding: #{to}" end self.qual = na_qual.to_s