From: martinahansen Date: Sat, 19 Feb 2011 14:51:05 +0000 (+0000) Subject: added adaptor_locate and adaptor_clip methods to ruby code X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=8d58776e018a3f5d8dee6cb025bbd7eebf309ac8;p=biopieces.git added adaptor_locate and adaptor_clip methods to ruby code git-svn-id: http://biopieces.googlecode.com/svn/trunk@1273 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index a8b4a84..72ca1cd 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -201,7 +201,7 @@ class Seq end # Method that generates a random sequence of a given length and type. - def generate(length,type) + def generate(length, type) raise SeqError, "Cannot generate sequence length < 1: #{length}" if length <= 0 case type.downcase @@ -290,6 +290,67 @@ class Seq ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2) end + # Method that locates an adaptor or part thereof in the sequence + # of a Seq object beginning from the right. Returns the location + # in the sequence that overlaps with the adaptor or -1 if the + # adaptor was not found. + def adaptor_locate_right(adaptor) + pos = self.seq.length - adaptor.length + + while pos < self.seq.length + len = self.seq.length - pos + subseq = self.seq[pos ... pos + len].upcase + subadaptor = adaptor[0 ... len].upcase + + return pos if subseq == subadaptor + pos += 1 + end + + -1 + end + + # Method that locates an adaptor or part thereof in the sequence + # of a Seq object beginning from the left. Returns the location + # in the sequence that overlaps with the adaptor or -1 if the + # adaptor was not found. + def adaptor_locate_left(adaptor) + pos = adaptor.length + + while pos > 0 + len = pos + subseq = self.seq[0 ... len].upcase + subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase + + pos -= 1 + + return pos if subseq == subadaptor + end + + -1 + end + + # Method that locates an adaptor or part thereof in the sequence + # of a Seq object beginning from the right and removes the adaptor + # sequence if found. + def adaptor_clip_right(adaptor) + pos = self.adaptor_locate_right(adaptor) + + if pos > 0 + self.seq = self.seq[0 ... pos] + end + end + + # Method that locates an adaptor or part thereof in the sequence + # of a Seq object beginning from the left and removes the adaptor + # sequence if found. + def adaptor_clip_left(adaptor) + pos = self.adaptor_locate_left(adaptor) + + if pos > 0 + self.seq = self.seq[pos + 1 ... self.seq.length] + end + end + # Method to convert the quality scores from a specified base # to another base. def convert_phred2illumina! @@ -412,9 +473,6 @@ end __END__ - - - # Class containing generic sequence methods and nucleic acid and amino acid subclasses. class Seq < String # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes. @@ -432,24 +490,6 @@ class Seq < String end end - # Method to wrap a sequence to a given width using a given delimiter. - def wrap(width = 80, delimit = $/) - raise ArgumentError, "Cannot wrap sequence to negative width: #{width}." if width <= 0 - - self.delete!(" \t\n\r") - self.gsub(/.{#{width}}(?!$)/, "\\0#{delimit}") - end - - # Method to wrap and replace a sequence to a given width using a given delimiter. - def wrap!(width = 80, delimit = $/) - self.replace(self.wrap(width, delimit)) - end - - # Method that replaces sequence with a randomly generated sequence of a given length. - def generate!(length) - self.replace(self.generate(length)) - end - # Class containing methods specific for amino acid (AA) sequences. class AA < Seq # Method that returns an array of amino acid residues. diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index e1dfd87..8668738 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -187,7 +187,7 @@ class TestSeq < Test::Unit::TestCase end def test_Seq_generate_with_ok_type_dont_raise - %w[ dna DNA rna RNA protein Protein ].each do |type| + %w[dna DNA rna RNA protein Protein].each do |type| assert_nothing_raised { @entry.generate(10, type) } end end @@ -264,6 +264,38 @@ class TestSeq < Test::Unit::TestCase assert_equal(25.00, @entry.soft_mask) end + def test_Seq_adaptor_locate_right_returns_correctly + @entry.seq = "nnnnncgat" + assert_equal(-1, @entry.adaptor_locate_right("X")) + assert_equal(8, @entry.adaptor_locate_right("TX")) + assert_equal(7, @entry.adaptor_locate_right("ATX")) + assert_equal(6, @entry.adaptor_locate_right("GATX")) + assert_equal(5, @entry.adaptor_locate_right("CGATX")) + assert_equal(0, @entry.adaptor_locate_right("NNNNNCGAT")) + end + + def test_Seq_adaptor_locate_left_returns_correctly + @entry.seq = "cgatnnnnn" + assert_equal(-1, @entry.adaptor_locate_left("X")) + assert_equal(0, @entry.adaptor_locate_left("XC")) + assert_equal(1, @entry.adaptor_locate_left("XCG")) + assert_equal(2, @entry.adaptor_locate_left("XCGA")) + assert_equal(3, @entry.adaptor_locate_left("XCGAT")) + assert_equal(8, @entry.adaptor_locate_left("CGATNNNNN")) + end + + def test_Seq_adaptor_clip_right_returns_correct_sequence + @entry.seq = "nnnnncgat" + @entry.adaptor_clip_right("cgat") + assert_equal( "nnnnn", @entry.seq) + end + + def test_Seq_adaptor_clip_left_returns_correct_sequence + @entry.seq = "cgatnnnnn" + @entry.adaptor_clip_left("cgat") + assert_equal( "nnnnn", @entry.seq) + end + def test_Digest_new_raises_on_bad_pattern_residue assert_raise(DigestError) { Digest.new(@entry, "X", 4) } end @@ -281,242 +313,3 @@ end __END__ - -class TestSeq < Test::Unit::TestCase - # Testing Seq#guess_type - - def test_guess_type_raise_if_no_sequence - s = Seq.new - - assert_raise( ArgumentError ) { s.guess_type } - end - - def test_guess_type_AA_uppercase - s1 = Seq.new( "SEQ" ) - s2 = Seq::AA.new( "SEQ" ) - assert_equal( s1.guess_type.class, s2.class ) - end - - def test_guess_type_AA_lowercase - s1 = Seq.new( "seq" ) - s2 = Seq::AA.new( "seq" ) - - assert_equal( s1.guess_type.class, s2.class ) - end - - def test_guess_type_DNA_uppercase - s1 = Seq.new( "ATCG" ) - s2 = Seq::NA::DNA.new( "ATCG" ) - - assert_equal( s1.guess_type.class, s2.class ) - end - - def test_guess_type_DNA_lowercase - s1 = Seq.new( "atcg" ) - s2 = Seq::NA::DNA.new( "atcg" ) - - assert_equal( s1.guess_type.class, s2.class ) - end - - def test_guess_type_RNA_uppercase - s1 = Seq.new( "AUCG" ) - s2 = Seq::NA::RNA.new( "AUCG" ) - - assert_equal( s1.guess_type.class, s2.class ) - end - - def test_guess_type_RNA_lowercase - s1 = Seq.new( "aucg" ) - s2 = Seq::NA::RNA.new( "aucg" ) - - assert_equal( s1.guess_type.class, s2.class ) - end - - # Testing Seq#wrap - - def test_wrap_arg_is_a_positive_number - s = Seq.new - - assert_raise( ArgumentError ) { s.wrap( 0 ) } - assert_raise( ArgumentError ) { s.wrap( -10 ) } - end - - def test_wrap_with_0_args - s = Seq.new( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACGACTACGACT" ) - - assert_equal( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACG\nACTACGACT", s.wrap.to_s ) - end - - def test_wrap_with_1_args - s = Seq.new( "ATCG" ) - - assert_equal( "AT\nCG", s.wrap( 2 ).to_s ) - end - - def test_wrap_with_2_args - s = Seq.new( "ATCG" ) - - assert_equal( "AT\rCG", s.wrap( 2, "\r" ).to_s ) - end - - def test_wrap_dont_change_instance_var - s = Seq.new( "ATCG" ) - - s.wrap( 2 ) - - assert_equal( "ATCG", s.to_s ) - end - - # Testing Seq#wrap! - - def test_wrap_with_0_args! - s = Seq.new( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACGACTACGACT" ) - - s.wrap! - - assert_equal( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACG\nACTACGACT", s.to_s ) - end - - def test_wrap_with_1_args! - s = Seq.new( "ATCG" ) - - s.wrap!( 2 ) - - assert_equal( "AT\nCG", s.to_s ) - end - - def test_wrap_with_2_args! - s = Seq.new( "ATCG" ) - - s.wrap!( 2, "\r" ) - - assert_equal( "AT\rCG", s.to_s ) - end - - # Testing Seq#generate - - def test_generate_arg_is_a_positive_number - s = Seq.new - - assert_raise( ArgumentError ) { s.generate( 0 ) } - assert_raise( ArgumentError ) { s.generate( -10 ) } - end - - def test_generate - s = Seq::AA.new - - seq = s.generate( 40 ) - - assert_equal( 40, seq.length ) - end - - def test_generate_dont_change_instance_var - s = Seq::AA.new - - seq = s.generate( 40 ) - - assert_equal( "", s.to_s ) - end - - # Testing Seq#generate! - - def test_generate! - s = Seq::AA.new - - s.generate!( 40 ) - - assert_equal( 40, s.length ) - end - - # Testing Seq::AA#residues - - def test_Seq_AA_residues - s = Seq::AA.new - - assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.residues ) - end - - # Testing Seq::AA#mol_weight - - def test_Seq_aa_mol_weight_bad_residue - s = Seq::AA.new( "7" ) - assert_raise( ArgumentError ) { s.mol_weight } - end - - def test_Seq_aa_mol_wight_return_correct_uppercase - s = Seq::AA.new( "SEQ" ) - assert_equal( 398.0, s.mol_weight ) - end - - def test_Seq_aa_mol_wight_return_correct_lowercase - s = Seq::AA.new( "seq" ) - assert_equal( 398.0, s.mol_weight ) - end - - # Testing Seq::NA::DNA#residues - - def test_Seq_NA_DNA_residues - s = Seq::NA::DNA.new - - assert_equal( %w{ A T C G }, s.residues ) - end - - # Testing Seq::NA::DNA#complement - - def test_Seq_NA_DNA_complement_correct - s = Seq::NA::DNA.new( "ATCGatcg" ) - assert_equal( "TAGCtagc", s.complement.to_s ) - end - - # Testing Seq::NA::DNA#to_RNA - - def test_Seq_NA_DNA_to_RNA_returns_RNA_object - dna = Seq::NA::DNA.new( "ATCGatcg" ) - rna = Seq::NA::RNA.new - - new_rna = dna.to_RNA - - assert_equal( rna.class, new_rna.class ) - end - - def test_Seq_NA_DNA_to_RNA_is_correct - dna = Seq::NA::DNA.new( "ATCGatcg" ) - rna = dna.to_RNA - - assert_equal( "AUCGaucg", rna.to_s ) - end - - # Testing Seq::NA::RNA#residues - - def test_Seq_NA_RNA_residues - s = Seq::NA::RNA.new - - assert_equal( %w{ A U C G }, s.residues ) - end - - # Testing Seq::NA::RNA#complement - - def test_Seq_NA_RNA_complement_correct - s = Seq::NA::RNA.new( "AUCGaucg" ) - assert_equal( "UAGCuagc", s.complement.to_s ) - end - - # Testing Seq::NA::RNA#to_DNA - - def test_Seq_NA_RNA_to_DNA_returns_DNA_object - rna = Seq::NA::RNA.new( "AUCGaucg" ) - dna = Seq::NA::DNA.new - - new_dna = rna.to_DNA - - assert_equal( dna.class, new_dna.class ) - end - - def test_Seq_NA_RNA_to_DNA_is_correct - rna = Seq::NA::RNA.new( "AUCGaucg" ) - dna = rna.to_DNA - - assert_equal( "ATCGatcg", dna.to_s ) - end -end -