X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fseq.rb;h=e4b6bfd7c27d5933d0bd9c6b65210bd9a7b32ea0;hb=b2ea0b5a51a558478af60e9df4c643dd58552086;hp=d8a73d31ba8d660fa08bb6aac9c0c24f91137151;hpb=351549b36ba20cf06daf9cf0e14273d84ae1f83e;p=biopieces.git diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index d8a73d3..e4b6bfd 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -27,8 +27,10 @@ require 'maasha/seq/digest' require 'maasha/seq/trim' require 'narray' -autoload :BackTrack, 'maasha/seq/backtrack.rb' -autoload :Dynamic, 'maasha/seq/dynamic.rb' +autoload :BackTrack, 'maasha/seq/backtrack' +autoload :Dynamic, 'maasha/seq/dynamic' +autoload :Homopolymer, 'maasha/seq/homopolymer' +autoload :Levenshtein, 'maasha/seq/levenshtein' # Residue alphabets DNA = %w[a t c g] @@ -67,7 +69,6 @@ TRANS_TAB11 = { "GTG" => "V", "GCG" => "A", "GAG" => "E", "GGG" => "G" } - # Error class for all exceptions to do with Seq. class SeqError < StandardError; end @@ -383,8 +384,14 @@ class Seq # Method to determine the Hamming Distance between # two Sequence objects (case insensitive). - def hamming_distance(seq) - self.seq.upcase.hamming_distance(seq.seq.upcase) + def hamming_distance(entry) + self.seq.upcase.hamming_distance(entry.seq.upcase) + end + + # Method to determine the Edit Distance between + # two Sequence objects (case insensitive). + def edit_distance(entry) + Levenshtein.distance(self.seq, entry.seq) end # Method that generates a random sequence of a given length and type. @@ -515,23 +522,6 @@ class Seq comp end - # Method that returns the length of the longest homopolymeric stretch - # found in a sequence. - def homopol_max(min = 1) - return 0 if self.seq.nil? or self.seq.empty? - - found = false - - self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match| - found = true - min = match.size > min ? match.size : min - end - - return 0 unless found - - min - end - # Method that returns the percentage of hard masked residues # or N's in a sequence. def hard_mask