end
# Method that generates a random sequence of a given length and type.
- def generate(length,type)
+ def generate(length, type)
raise SeqError, "Cannot generate sequence length < 1: #{length}" if length <= 0
case type.downcase
((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2)
end
+ # Method that locates an adaptor or part thereof in the sequence
+ # of a Seq object beginning from the right. Returns the location
+ # in the sequence that overlaps with the adaptor or -1 if the
+ # adaptor was not found.
+ def adaptor_locate_right(adaptor)
+ pos = self.seq.length - adaptor.length
+
+ while pos < self.seq.length
+ len = self.seq.length - pos
+ subseq = self.seq[pos ... pos + len].upcase
+ subadaptor = adaptor[0 ... len].upcase
+
+ return pos if subseq == subadaptor
+ pos += 1
+ end
+
+ -1
+ end
+
+ # Method that locates an adaptor or part thereof in the sequence
+ # of a Seq object beginning from the left. Returns the location
+ # in the sequence that overlaps with the adaptor or -1 if the
+ # adaptor was not found.
+ def adaptor_locate_left(adaptor)
+ pos = adaptor.length
+
+ while pos > 0
+ len = pos
+ subseq = self.seq[0 ... len].upcase
+ subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase
+
+ pos -= 1
+
+ return pos if subseq == subadaptor
+ end
+
+ -1
+ end
+
+ # Method that locates an adaptor or part thereof in the sequence
+ # of a Seq object beginning from the right and removes the adaptor
+ # sequence if found.
+ def adaptor_clip_right(adaptor)
+ pos = self.adaptor_locate_right(adaptor)
+
+ if pos > 0
+ self.seq = self.seq[0 ... pos]
+ end
+ end
+
+ # Method that locates an adaptor or part thereof in the sequence
+ # of a Seq object beginning from the left and removes the adaptor
+ # sequence if found.
+ def adaptor_clip_left(adaptor)
+ pos = self.adaptor_locate_left(adaptor)
+
+ if pos > 0
+ self.seq = self.seq[pos + 1 ... self.seq.length]
+ end
+ end
+
# Method to convert the quality scores from a specified base
# to another base.
def convert_phred2illumina!
__END__
-
-
-
# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
class Seq < String
# Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
end
end
- # Method to wrap a sequence to a given width using a given delimiter.
- def wrap(width = 80, delimit = $/)
- raise ArgumentError, "Cannot wrap sequence to negative width: #{width}." if width <= 0
-
- self.delete!(" \t\n\r")
- self.gsub(/.{#{width}}(?!$)/, "\\0#{delimit}")
- end
-
- # Method to wrap and replace a sequence to a given width using a given delimiter.
- def wrap!(width = 80, delimit = $/)
- self.replace(self.wrap(width, delimit))
- end
-
- # Method that replaces sequence with a randomly generated sequence of a given length.
- def generate!(length)
- self.replace(self.generate(length))
- end
-
# Class containing methods specific for amino acid (AA) sequences.
class AA < Seq
# Method that returns an array of amino acid residues.
end
def test_Seq_generate_with_ok_type_dont_raise
- %w[ dna DNA rna RNA protein Protein ].each do |type|
+ %w[dna DNA rna RNA protein Protein].each do |type|
assert_nothing_raised { @entry.generate(10, type) }
end
end
assert_equal(25.00, @entry.soft_mask)
end
+ def test_Seq_adaptor_locate_right_returns_correctly
+ @entry.seq = "nnnnncgat"
+ assert_equal(-1, @entry.adaptor_locate_right("X"))
+ assert_equal(8, @entry.adaptor_locate_right("TX"))
+ assert_equal(7, @entry.adaptor_locate_right("ATX"))
+ assert_equal(6, @entry.adaptor_locate_right("GATX"))
+ assert_equal(5, @entry.adaptor_locate_right("CGATX"))
+ assert_equal(0, @entry.adaptor_locate_right("NNNNNCGAT"))
+ end
+
+ def test_Seq_adaptor_locate_left_returns_correctly
+ @entry.seq = "cgatnnnnn"
+ assert_equal(-1, @entry.adaptor_locate_left("X"))
+ assert_equal(0, @entry.adaptor_locate_left("XC"))
+ assert_equal(1, @entry.adaptor_locate_left("XCG"))
+ assert_equal(2, @entry.adaptor_locate_left("XCGA"))
+ assert_equal(3, @entry.adaptor_locate_left("XCGAT"))
+ assert_equal(8, @entry.adaptor_locate_left("CGATNNNNN"))
+ end
+
+ def test_Seq_adaptor_clip_right_returns_correct_sequence
+ @entry.seq = "nnnnncgat"
+ @entry.adaptor_clip_right("cgat")
+ assert_equal( "nnnnn", @entry.seq)
+ end
+
+ def test_Seq_adaptor_clip_left_returns_correct_sequence
+ @entry.seq = "cgatnnnnn"
+ @entry.adaptor_clip_left("cgat")
+ assert_equal( "nnnnn", @entry.seq)
+ end
+
def test_Digest_new_raises_on_bad_pattern_residue
assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
end
__END__
-
-class TestSeq < Test::Unit::TestCase
- # Testing Seq#guess_type
-
- def test_guess_type_raise_if_no_sequence
- s = Seq.new
-
- assert_raise( ArgumentError ) { s.guess_type }
- end
-
- def test_guess_type_AA_uppercase
- s1 = Seq.new( "SEQ" )
- s2 = Seq::AA.new( "SEQ" )
- assert_equal( s1.guess_type.class, s2.class )
- end
-
- def test_guess_type_AA_lowercase
- s1 = Seq.new( "seq" )
- s2 = Seq::AA.new( "seq" )
-
- assert_equal( s1.guess_type.class, s2.class )
- end
-
- def test_guess_type_DNA_uppercase
- s1 = Seq.new( "ATCG" )
- s2 = Seq::NA::DNA.new( "ATCG" )
-
- assert_equal( s1.guess_type.class, s2.class )
- end
-
- def test_guess_type_DNA_lowercase
- s1 = Seq.new( "atcg" )
- s2 = Seq::NA::DNA.new( "atcg" )
-
- assert_equal( s1.guess_type.class, s2.class )
- end
-
- def test_guess_type_RNA_uppercase
- s1 = Seq.new( "AUCG" )
- s2 = Seq::NA::RNA.new( "AUCG" )
-
- assert_equal( s1.guess_type.class, s2.class )
- end
-
- def test_guess_type_RNA_lowercase
- s1 = Seq.new( "aucg" )
- s2 = Seq::NA::RNA.new( "aucg" )
-
- assert_equal( s1.guess_type.class, s2.class )
- end
-
- # Testing Seq#wrap
-
- def test_wrap_arg_is_a_positive_number
- s = Seq.new
-
- assert_raise( ArgumentError ) { s.wrap( 0 ) }
- assert_raise( ArgumentError ) { s.wrap( -10 ) }
- end
-
- def test_wrap_with_0_args
- s = Seq.new( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACGACTACGACT" )
-
- assert_equal( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACG\nACTACGACT", s.wrap.to_s )
- end
-
- def test_wrap_with_1_args
- s = Seq.new( "ATCG" )
-
- assert_equal( "AT\nCG", s.wrap( 2 ).to_s )
- end
-
- def test_wrap_with_2_args
- s = Seq.new( "ATCG" )
-
- assert_equal( "AT\rCG", s.wrap( 2, "\r" ).to_s )
- end
-
- def test_wrap_dont_change_instance_var
- s = Seq.new( "ATCG" )
-
- s.wrap( 2 )
-
- assert_equal( "ATCG", s.to_s )
- end
-
- # Testing Seq#wrap!
-
- def test_wrap_with_0_args!
- s = Seq.new( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACGACTACGACT" )
-
- s.wrap!
-
- assert_equal( "ACTGACTAGCATCGACTACGACTGACACGACGACGACGACCGAACGATCGATCGCAGACGACGCAGCATGACGACGTACG\nACTACGACT", s.to_s )
- end
-
- def test_wrap_with_1_args!
- s = Seq.new( "ATCG" )
-
- s.wrap!( 2 )
-
- assert_equal( "AT\nCG", s.to_s )
- end
-
- def test_wrap_with_2_args!
- s = Seq.new( "ATCG" )
-
- s.wrap!( 2, "\r" )
-
- assert_equal( "AT\rCG", s.to_s )
- end
-
- # Testing Seq#generate
-
- def test_generate_arg_is_a_positive_number
- s = Seq.new
-
- assert_raise( ArgumentError ) { s.generate( 0 ) }
- assert_raise( ArgumentError ) { s.generate( -10 ) }
- end
-
- def test_generate
- s = Seq::AA.new
-
- seq = s.generate( 40 )
-
- assert_equal( 40, seq.length )
- end
-
- def test_generate_dont_change_instance_var
- s = Seq::AA.new
-
- seq = s.generate( 40 )
-
- assert_equal( "", s.to_s )
- end
-
- # Testing Seq#generate!
-
- def test_generate!
- s = Seq::AA.new
-
- s.generate!( 40 )
-
- assert_equal( 40, s.length )
- end
-
- # Testing Seq::AA#residues
-
- def test_Seq_AA_residues
- s = Seq::AA.new
-
- assert_equal( %w{ F L S Y C W P H Q R I M T N K V A D E G }, s.residues )
- end
-
- # Testing Seq::AA#mol_weight
-
- def test_Seq_aa_mol_weight_bad_residue
- s = Seq::AA.new( "7" )
- assert_raise( ArgumentError ) { s.mol_weight }
- end
-
- def test_Seq_aa_mol_wight_return_correct_uppercase
- s = Seq::AA.new( "SEQ" )
- assert_equal( 398.0, s.mol_weight )
- end
-
- def test_Seq_aa_mol_wight_return_correct_lowercase
- s = Seq::AA.new( "seq" )
- assert_equal( 398.0, s.mol_weight )
- end
-
- # Testing Seq::NA::DNA#residues
-
- def test_Seq_NA_DNA_residues
- s = Seq::NA::DNA.new
-
- assert_equal( %w{ A T C G }, s.residues )
- end
-
- # Testing Seq::NA::DNA#complement
-
- def test_Seq_NA_DNA_complement_correct
- s = Seq::NA::DNA.new( "ATCGatcg" )
- assert_equal( "TAGCtagc", s.complement.to_s )
- end
-
- # Testing Seq::NA::DNA#to_RNA
-
- def test_Seq_NA_DNA_to_RNA_returns_RNA_object
- dna = Seq::NA::DNA.new( "ATCGatcg" )
- rna = Seq::NA::RNA.new
-
- new_rna = dna.to_RNA
-
- assert_equal( rna.class, new_rna.class )
- end
-
- def test_Seq_NA_DNA_to_RNA_is_correct
- dna = Seq::NA::DNA.new( "ATCGatcg" )
- rna = dna.to_RNA
-
- assert_equal( "AUCGaucg", rna.to_s )
- end
-
- # Testing Seq::NA::RNA#residues
-
- def test_Seq_NA_RNA_residues
- s = Seq::NA::RNA.new
-
- assert_equal( %w{ A U C G }, s.residues )
- end
-
- # Testing Seq::NA::RNA#complement
-
- def test_Seq_NA_RNA_complement_correct
- s = Seq::NA::RNA.new( "AUCGaucg" )
- assert_equal( "UAGCuagc", s.complement.to_s )
- end
-
- # Testing Seq::NA::RNA#to_DNA
-
- def test_Seq_NA_RNA_to_DNA_returns_DNA_object
- rna = Seq::NA::RNA.new( "AUCGaucg" )
- dna = Seq::NA::DNA.new
-
- new_dna = rna.to_DNA
-
- assert_equal( dna.class, new_dna.class )
- end
-
- def test_Seq_NA_RNA_to_DNA_is_correct
- rna = Seq::NA::RNA.new( "AUCGaucg" )
- dna = rna.to_DNA
-
- assert_equal( "ATCGatcg", dna.to_s )
- end
-end
-