From: martinahansen Date: Thu, 28 Feb 2013 12:36:22 +0000 (+0000) Subject: refactoring of revcomp in seq.rb X-Git-Url: https://git.donarmstrong.com/?p=biopieces.git;a=commitdiff_plain;h=b982cc677363d7458963b610ec53bf2c4f7476a9 refactoring of revcomp in seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@2106 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/find_adaptor b/bp_bin/find_adaptor index 4fd559d..ac3681a 100755 --- a/bp_bin/find_adaptor +++ b/bp_bin/find_adaptor @@ -53,11 +53,11 @@ casts << {:long=>'deletions', :short=>'d', :type=>'uint', :mandatory=>false, options = Biopieces.options_parse(ARGV, casts) if options[:forward_rc] - options[:forward] = Seq.new("test", options[:forward_rc], 'dna').revcomp.seq + options[:forward] = Seq.new("test", options[:forward_rc], 'dna').reverse.complement.seq end if options[:reverse_rc] - options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').revcomp.seq + options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').reverse.complement.seq end raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse] diff --git a/bp_bin/pcr_seq b/bp_bin/pcr_seq index 5cee30b..57eb447 100755 --- a/bp_bin/pcr_seq +++ b/bp_bin/pcr_seq @@ -146,7 +146,7 @@ class Pattern seq = Seq.new seq.seq = primer seq.type = 'dna' - seq.revcomp + seq.reverse!.complement! descriptor ? seq.seq + descriptor : seq.seq end @@ -164,11 +164,11 @@ tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") if options[:forward_rc] - options[:forward] = Seq.new("test", options[:forward_rc], 'dna').revcomp.seq + options[:forward] = Seq.new("test", options[:forward_rc], 'dna').reverse.complement.seq end if options[:reverse_rc] - options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').revcomp.seq + options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').reverse.complement.seq end raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse] diff --git a/bp_bin/shred_seq b/bp_bin/shred_seq index 5b2a886..465f088 100755 --- a/bp_bin/shred_seq +++ b/bp_bin/shred_seq @@ -51,7 +51,7 @@ class Seq while coverage(sum) < max_cov entry = self.subseq_rand(size) - entry.revcomp if strand == '-' + entry.reverse!.complement! if strand == '-' if block_given? yield entry diff --git a/code_ruby/lib/maasha/locator.rb b/code_ruby/lib/maasha/locator.rb index 59bcdb1..5996663 100644 --- a/code_ruby/lib/maasha/locator.rb +++ b/code_ruby/lib/maasha/locator.rb @@ -110,7 +110,7 @@ class Locator newseq = Seq.new(nil, @seq.seq[int_beg..int_end], "dna") unless newseq.seq.nil? - newseq.revcomp if comp + newseq.reverse!.complement! if comp @subseq.seq << (order ? " " + newseq.seq : newseq.seq) end @@ -120,7 +120,7 @@ class Locator newseq = Seq.new(nil, @seq.seq[pos], "dna") unless newseq.seq.nil? - newseq.revcomp if comp + newseq.reverse!.complement! if comp @subseq.seq << (order ? " " + newseq.seq : newseq.seq) end diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index e72250d..4a0af47 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -150,6 +150,7 @@ class Seq # by inspecting the first 100 residues. def type_guess! self.type = self.type_guess + self end # Returns the length of a sequence. @@ -334,17 +335,13 @@ class Seq key end - # Method to reverse complement sequence. - def reverse_complement - self.reverse - self.complement - self + # Method to reverse the sequence. + def reverse + Seq.new(self.seq_name, self.seq.reverse, self.type, self.qual ? self.qual.reverse : self.qual) end - alias :revcomp :reverse_complement - # Method to reverse the sequence. - def reverse + def reverse! self.seq.reverse! self.qual.reverse! if self.qual self @@ -354,6 +351,26 @@ class Seq def complement raise SeqError, "Cannot complement 0 length sequence" if self.length == 0 + entry = Seq.new + entry.seq_name = self.seq_name + entry.type = self.type + entry.qual = self.qual + + if self.is_dna? + entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn') + elsif self.is_rna? + entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn') + else + raise SeqError, "Cannot complement sequence type: #{self.type}" + end + + entry + end + + # Method that complements sequence including ambiguity codes. + def complement! + raise SeqError, "Cannot complement 0 length sequence" if self.length == 0 + if self.is_dna? self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn') elsif self.is_rna? @@ -361,6 +378,8 @@ class Seq else raise SeqError, "Cannot complement sequence type: #{self.type}" end + + self end # Method to determine the Hamming Distance between @@ -488,21 +507,6 @@ class Seq ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2) end - # Hard masks sequence residues where the corresponding quality score - # is below a given cutoff. - def mask_seq_hard_old(cutoff) - seq = self.seq.upcase - scores = self.qual - i = 0 - - scores.each_char do |score| - seq[i] = 'N' if score.ord - SCORE_BASE < cutoff - i += 1 - end - - self.seq = seq - end - # Hard masks sequence residues where the corresponding quality score # is below a given cutoff. def mask_seq_hard!(cutoff) diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index f966a5f..8bc6852 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -219,7 +219,15 @@ class TestSeq < Test::Unit::TestCase def test_Seq_reverse_returns_correctly @entry.seq = "ATCG" - assert_equal("GCTA", @entry.reverse.seq) + new_entry = @entry.reverse + assert_equal("GCTA", new_entry.seq) + assert_equal("ATCG", @entry.seq) + end + + def test_Seq_reverse_bang_returns_correctly + @entry.seq = "ATCG" + @entry.reverse! + assert_equal("GCTA", @entry.seq) end def test_Seq_complement_raises_if_no_sequence @@ -236,27 +244,43 @@ class TestSeq < Test::Unit::TestCase def test_Seq_complement_for_DNA_is_correct @entry.seq = 'ATCGatcg' @entry.type = 'dna' - assert_equal("TAGCtagc", @entry.complement) + comp = @entry.complement + assert_equal("TAGCtagc", comp.seq) + assert_equal("ATCGatcg", @entry.seq) end def test_Seq_complement_for_RNA_is_correct @entry.seq = 'AUCGaucg' @entry.type = 'rna' - assert_equal("UAGCuagc", @entry.complement) + comp = @entry.complement + assert_equal("UAGCuagc", comp.seq) + assert_equal("AUCGaucg", @entry.seq) + end + + def test_Seq_complement_bang_raises_if_no_sequence + @entry.type = 'dna' + assert_raise(SeqError) { @entry.complement! } + end + + def test_Seq_complement_bang_raises_on_bad_type + @entry.seq = 'ATCG' + @entry.type = 'protein' + assert_raise(SeqError) { @entry.complement! } end - def test_Seq_reverse_complement_for_DNA_is_correct + def test_Seq_complement_bang_for_DNA_is_correct @entry.seq = 'ATCGatcg' @entry.type = 'dna' - assert_equal("cgatCGAT", @entry.reverse_complement.seq) + assert_equal("TAGCtagc", @entry.complement!.seq) end - def test_Seq_reverse_complement_for_RNA_is_correct + def test_Seq_complement_bang_for_RNA_is_correct @entry.seq = 'AUCGaucg' @entry.type = 'rna' - assert_equal("cgauCGAU", @entry.reverse_complement.seq) + assert_equal("UAGCuagc", @entry.complement!.seq) end + def test_Seq_hamming_distance_returns_correctly seq1 = Seq.new("test1", "ATCG") seq2 = Seq.new("test2", "atgg")