From a945a21356f87f679f433279e502676f425ac8ee Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 8 Mar 2013 10:43:05 +0000 Subject: [PATCH] refactoring of ruby code converting sequences types to symbols git-svn-id: http://biopieces.googlecode.com/svn/trunk@2114 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/blast_seq_pair | 6 +- bp_bin/find_adaptor | 4 +- bp_bin/patscan_seq | 2 +- bp_bin/pcr_seq | 8 +-- bp_bin/shred_seq | 2 +- code_ruby/lib/maasha/align.rb | 2 +- code_ruby/lib/maasha/embl.rb | 2 +- code_ruby/lib/maasha/genbank.rb | 2 +- code_ruby/lib/maasha/locator.rb | 6 +- code_ruby/lib/maasha/seq.rb | 42 +++++++------- code_ruby/lib/maasha/seq/patscan.rb | 6 +- code_ruby/test/maasha/test_locator.rb | 2 +- code_ruby/test/maasha/test_seq.rb | 82 +++++++++++++-------------- 13 files changed, 81 insertions(+), 85 deletions(-) diff --git a/bp_bin/blast_seq_pair b/bp_bin/blast_seq_pair index 1963eaf..d612a6c 100755 --- a/bp_bin/blast_seq_pair +++ b/bp_bin/blast_seq_pair @@ -121,14 +121,14 @@ class Blast def program_choose(type1, type2) program = "" - if type1 == 'protein' - if type2 == 'protein' + if type1 == :protein + if type2 == :protein program = 'blastp' else program = 'tblastn' end else - if type2 == 'protein' + if type2 == :protein program = 'blastx' else program = 'blastn' diff --git a/bp_bin/find_adaptor b/bp_bin/find_adaptor index ac3681a..f2f593d 100755 --- a/bp_bin/find_adaptor +++ b/bp_bin/find_adaptor @@ -53,11 +53,11 @@ casts << {:long=>'deletions', :short=>'d', :type=>'uint', :mandatory=>false, options = Biopieces.options_parse(ARGV, casts) if options[:forward_rc] - options[:forward] = Seq.new("test", options[:forward_rc], 'dna').reverse.complement.seq + options[:forward] = Seq.new("test", options[:forward_rc], :dna).reverse.complement.seq end if options[:reverse_rc] - options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').reverse.complement.seq + options[:reverse] = Seq.new("test", options[:reverse_rc], :dna).reverse.complement.seq end raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse] diff --git a/bp_bin/patscan_seq b/bp_bin/patscan_seq index bc07fca..089954d 100755 --- a/bp_bin/patscan_seq +++ b/bp_bin/patscan_seq @@ -50,7 +50,7 @@ class Patscan args = [] args << "scan_for_matches" args << "-c" if comp - args << "-p" if type == 'protein' + args << "-p" if type == :protein args << "-n #{max_mis}" if max_mis args << "-m #{max_hit}" if max_hit args << @pat_file diff --git a/bp_bin/pcr_seq b/bp_bin/pcr_seq index f6c874e..7e487f8 100755 --- a/bp_bin/pcr_seq +++ b/bp_bin/pcr_seq @@ -145,7 +145,7 @@ class Pattern seq = Seq.new seq.seq = primer - seq.type = 'dna' + seq.type = :dna seq.reverse!.complement! descriptor ? seq.seq + descriptor : seq.seq @@ -157,18 +157,18 @@ casts << {:long=>'forward', :short=>'f', :type=>'string', :mandatory=>false, casts << {:long=>'forward_rc', :short=>'F', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'reverse', :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'reverse_rc', :short=>'R', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'max_dist', :short=>'m', :type=>'uint', :mandatory=>true, :default=>5000, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'max_dist', :short=>'m', :type=>'uint', :mandatory=>true, :default=>5000, :allowed=>nil, :disallowed=>"0"} options = Biopieces.options_parse(ARGV, casts) tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") if options[:forward_rc] - options[:forward] = Seq.new("test", options[:forward_rc], 'dna').reverse.complement.seq + options[:forward] = Seq.new("test", options[:forward_rc], :dna).reverse.complement.seq end if options[:reverse_rc] - options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').reverse.complement.seq + options[:reverse] = Seq.new("test", options[:reverse_rc], :dna).reverse.complement.seq end raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse] diff --git a/bp_bin/shred_seq b/bp_bin/shred_seq index ffd79cb..94d5835 100755 --- a/bp_bin/shred_seq +++ b/bp_bin/shred_seq @@ -85,7 +85,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| if record[:SEQ] and record[:SEQ].length >= options[:size] entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES]) - entry.type = 'dna' + entry.type = :dna entry.shred(options[:size], options[:coverage]) do |subentry| output.puts subentry.to_bp diff --git a/code_ruby/lib/maasha/align.rb b/code_ruby/lib/maasha/align.rb index c139fe8..7beb765 100755 --- a/code_ruby/lib/maasha/align.rb +++ b/code_ruby/lib/maasha/align.rb @@ -251,7 +251,7 @@ class Align new_seq = Seq.new new_seq.seq = consensus_seq new_seq.qual = consensus_qual if @has_qual - new_seq.type = "dna" + new_seq.type = :dna new_seq end diff --git a/code_ruby/lib/maasha/embl.rb b/code_ruby/lib/maasha/embl.rb index d08dce5..29c3eb6 100644 --- a/code_ruby/lib/maasha/embl.rb +++ b/code_ruby/lib/maasha/embl.rb @@ -87,7 +87,7 @@ class EMBL < Filesys seq = @entry[j + 1 .. i].join.delete(" 0123456789") - Seq.new(nil, seq, "dna") if seq + Seq.new(nil, seq, :dna) if seq end # Method to get the base keys from EMBL entry and return these diff --git a/code_ruby/lib/maasha/genbank.rb b/code_ruby/lib/maasha/genbank.rb index c3515ec..1cf2038 100644 --- a/code_ruby/lib/maasha/genbank.rb +++ b/code_ruby/lib/maasha/genbank.rb @@ -87,7 +87,7 @@ class Genbank < Filesys seq = @entry[j + 1 .. i].join.delete(" 0123456789") - Seq.new(nil, seq, "dna") if seq + Seq.new(nil, seq, :dna) if seq end # Method to get the base keys from Genbank entry and return these diff --git a/code_ruby/lib/maasha/locator.rb b/code_ruby/lib/maasha/locator.rb index 5996663..638f473 100644 --- a/code_ruby/lib/maasha/locator.rb +++ b/code_ruby/lib/maasha/locator.rb @@ -35,7 +35,7 @@ class Locator def initialize(locator, seq) @locator = locator @seq = seq - @subseq = Seq.new(nil, "", "dna") + @subseq = Seq.new(nil, "", :dna) parse_locator(locator) end @@ -107,7 +107,7 @@ class Locator int_beg = $1.to_i - 1 int_end = $2.to_i - 1 - newseq = Seq.new(nil, @seq.seq[int_beg..int_end], "dna") + newseq = Seq.new(nil, @seq.seq[int_beg..int_end], :dna) unless newseq.seq.nil? newseq.reverse!.complement! if comp @@ -117,7 +117,7 @@ class Locator when /^(\d+)$/ pos = $1.to_i - 1 - newseq = Seq.new(nil, @seq.seq[pos], "dna") + newseq = Seq.new(nil, @seq.seq[pos], :dna) unless newseq.seq.nil? newseq.reverse!.complement! if comp diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index cf4b6e6..92ce19e 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -87,7 +87,7 @@ class Seq def self.new_bp(record) seq_name = record[:SEQ_NAME] seq = record[:SEQ] - type = record[:SEQ_TYPE] + type = record[:SEQ_TYPE].to_sym if record[:SEQ_TYPE] qual = record[:SCORES] self.new(seq_name, seq, type, qual) @@ -98,9 +98,9 @@ class Seq raise SeqError, "Cannot generate negative oligo length: #{length}" if length <= 0 case type.downcase - when /dna/ then alph = DNA - when /rna/ then alph = RNA - when /protein/ then alph = PROTEIN + when :dna then alph = DNA + when :rna then alph = RNA + when :protein then alph = PROTEIN else raise SeqError, "Unknown sequence type: #{type}" end @@ -140,9 +140,9 @@ class Seq raise SeqError, "Guess failed: sequence is nil" if self.seq.nil? case self.seq[0 ... 100].downcase - when /[flpqie]/ then return "protein" - when /[u]/ then return "rna" - else return "dna" + when /[flpqie]/ then return :protein + when /[u]/ then return :rna + else return :dna end end @@ -190,24 +190,24 @@ class Seq # Method that returns true is a given sequence type is DNA. def is_dna? - self.type == 'dna' + self.type == :dna end # Method that returns true is a given sequence type is RNA. def is_rna? - self.type == 'rna' + self.type == :rna end # Method that returns true is a given sequence type is protein. def is_protein? - self.type == 'protein' + self.type == :protein end # Method to transcribe DNA to RNA. def to_rna raise SeqError, "Cannot transcribe 0 length sequence" if self.length == 0 raise SeqError, "Cannot transcribe sequence type: #{self.type}" unless self.is_dna? - self.type = 'rna' + self.type = :rna self.seq.tr!('Tt','Uu') end @@ -215,14 +215,13 @@ class Seq def to_dna raise SeqError, "Cannot reverse-transcribe 0 length sequence" if self.length == 0 raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna? - - self.type = 'dna' + self.type = :dna self.seq.tr!('Uu','Tt') end # Method to translate a DNA sequence to protein. def translate!(trans_tab = 11) - raise SeqError, "Sequence type must be 'dna' - not #{self.type}" unless self.type == 'dna' + raise SeqError, "Sequence type must be 'dna' - not #{self.type}" unless self.type == :dna raise SeqError, "Sequence length must be a multiplum of 3 - was: #{self.length}" unless (self.length % 3) == 0 case trans_tab @@ -257,7 +256,7 @@ class Seq self.seq = protein self.qual = nil - self.type = "protein" + self.type = :protein self end @@ -392,20 +391,17 @@ class Seq def generate(length, type) raise SeqError, "Cannot generate sequence length < 1: #{length}" if length <= 0 - case type.downcase - when "dna" - alph = DNA - when "rna" - alph = RNA - when "protein" - alph = PROTEIN + case type + when :dna then alph = DNA + when :rna then alph = RNA + when :protein then alph = PROTEIN else raise SeqError, "Unknown sequence type: #{type}" end seq_new = Array.new(length) { alph[rand(alph.size)] }.join("") self.seq = seq_new - self.type = type.downcase + self.type = type seq_new end diff --git a/code_ruby/lib/maasha/seq/patscan.rb b/code_ruby/lib/maasha/seq/patscan.rb index 4e31398..8cdfa8e 100644 --- a/code_ruby/lib/maasha/seq/patscan.rb +++ b/code_ruby/lib/maasha/seq/patscan.rb @@ -153,13 +153,13 @@ module Patscan def pattern_disambiguate case self.type - when 'protein' + when :protein @pattern.gsub!('J', '[IFVLWMAGCY]') @pattern.gsub!('O', '[TSHEDQNKR]') @pattern.gsub!('B', '[DN]') @pattern.gsub!('Z', '[EQ]') @pattern.gsub!('X', '.') - when 'dna' + when :dna @pattern.gsub!('R', '[AG]') @pattern.gsub!('Y', '[CT]') @pattern.gsub!('S', '[GC]') @@ -171,7 +171,7 @@ module Patscan @pattern.gsub!('D', '[AGT]') @pattern.gsub!('B', '[CGT]') @pattern.gsub!('N', '.') - when 'rna' + when :rna else raise SeqError "unknown sequence type: #{self.type}" end diff --git a/code_ruby/test/maasha/test_locator.rb b/code_ruby/test/maasha/test_locator.rb index 052f779..dc0fe14 100755 --- a/code_ruby/test/maasha/test_locator.rb +++ b/code_ruby/test/maasha/test_locator.rb @@ -32,7 +32,7 @@ require 'test/helper' class TestLocator < Test::Unit::TestCase def setup - @seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", "dna") + @seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", :dna) end test "#new with single position returns correctly" do diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index 4a68d67..eab90cb 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -35,11 +35,11 @@ class TestSeq < Test::Unit::TestCase end test "Seq.new_bp returns correctly" do - record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => "dna", :SCORES => "hhhh"} + record = {:SEQ_NAME => "test", :SEQ => "ATCG", :SEQ_TYPE => :dna, :SCORES => "hhhh"} seq = Seq.new_bp(record) assert_equal("test", seq.seq_name) assert_equal("ATCG", seq.seq) - assert_equal("dna", seq.type) + assert_equal(:dna, seq.type) assert_equal("hhhh", seq.qual) end @@ -48,7 +48,7 @@ class TestSeq < Test::Unit::TestCase end test "#is_dna? with dna sequence type returns true" do - @entry.type = 'dna' + @entry.type = :dna assert(@entry.is_dna? == true) end @@ -57,7 +57,7 @@ class TestSeq < Test::Unit::TestCase end test "#is_rna? with rna sequence type returns true" do - @entry.type = 'rna' + @entry.type = :rna assert(@entry.is_rna? == true) end @@ -66,7 +66,7 @@ class TestSeq < Test::Unit::TestCase end test "#is_protein? with protein sequence type returns true" do - @entry.type = 'protein' + @entry.type = :protein assert_equal(true, @entry.is_protein?) end @@ -76,17 +76,17 @@ class TestSeq < Test::Unit::TestCase test "#type_guess with protein returns protein" do @entry.seq = 'atcatcrFgatcg' - assert_equal('protein', @entry.type_guess) + assert_equal(:protein, @entry.type_guess) end test "#type_guess with rna returns rna" do @entry.seq = 'atcatcrUgatcg' - assert_equal('rna', @entry.type_guess) + assert_equal(:rna, @entry.type_guess) end test "#type_guess with dna returns dna" do @entry.seq = 'atcatcgatcg' - assert_equal('dna', @entry.type_guess) + assert_equal(:dna, @entry.type_guess) end test "#type_guess! without sequence raises" do @@ -96,19 +96,19 @@ class TestSeq < Test::Unit::TestCase test "#type_guess! with protein returns protein" do @entry.seq = 'atcatcrFgatcg' @entry.type_guess! - assert_equal('protein', @entry.type) + assert_equal(:protein, @entry.type) end test "#type_guess! with rna returns rna" do @entry.seq = 'atcatcrUgatcg' @entry.type_guess! - assert_equal('rna', @entry.type) + assert_equal(:rna, @entry.type) end test "#type_guess! with dna returns dna" do @entry.seq = 'atcatcgatcg' @entry.type_guess! - assert_equal('dna', @entry.type) + assert_equal(:dna, @entry.type) end test "#length returns corretly" do @@ -122,51 +122,51 @@ class TestSeq < Test::Unit::TestCase end test "#to_rna with no sequence raises" do - @entry.type = 'dna' + @entry.type = :dna assert_raise(SeqError) { @entry.to_rna } end test "#to_rna with bad type raises" do @entry.seq = 'ATCG' - @entry.type = 'rna' + @entry.type = :rna assert_raise(SeqError) { @entry.to_rna } end test "#to_rna transcribes correctly" do @entry.seq = 'ATCGatcg' - @entry.type = 'dna' + @entry.type = :dna assert_equal("AUCGaucg", @entry.to_rna) end test "#to_rna changes entry type to rna" do @entry.seq = 'ATCGatcg' - @entry.type = 'dna' + @entry.type = :dna @entry.to_rna - assert_equal("rna", @entry.type) + assert_equal(:rna, @entry.type) end test "#to_dna with no sequence raises" do - @entry.type = 'rna' + @entry.type = :rna assert_raise(SeqError) { @entry.to_dna } end test "#to_dna with bad type raises" do @entry.seq = 'AUCG' - @entry.type = 'dna' + @entry.type = :dna assert_raise(SeqError) { @entry.to_dna } end test "#to_dna transcribes correctly" do @entry.seq = 'AUCGaucg' - @entry.type = 'rna' + @entry.type = :rna assert_equal("ATCGatcg", @entry.to_dna) end test "#to_dna changes entry type to dna" do @entry.seq = 'AUCGaucg' - @entry.type = 'rna' + @entry.type = :rna @entry.to_dna - assert_equal("dna", @entry.type) + assert_equal(:dna, @entry.type) end test "#to_bp returns correct record" do @@ -249,19 +249,19 @@ class TestSeq < Test::Unit::TestCase end test "#complement with no sequence raises" do - @entry.type = 'dna' + @entry.type = :dna assert_raise(SeqError) { @entry.complement } end test "#complement with bad type raises" do @entry.seq = 'ATCG' - @entry.type = 'protein' + @entry.type = :protein assert_raise(SeqError) { @entry.complement } end test "#complement for DNA is correct" do @entry.seq = 'ATCGatcg' - @entry.type = 'dna' + @entry.type = :dna comp = @entry.complement assert_equal("TAGCtagc", comp.seq) assert_equal("ATCGatcg", @entry.seq) @@ -269,32 +269,32 @@ class TestSeq < Test::Unit::TestCase test "#complement for RNA is correct" do @entry.seq = 'AUCGaucg' - @entry.type = 'rna' + @entry.type = :rna comp = @entry.complement assert_equal("UAGCuagc", comp.seq) assert_equal("AUCGaucg", @entry.seq) end test "#complement! with no sequence raises" do - @entry.type = 'dna' + @entry.type = :dna assert_raise(SeqError) { @entry.complement! } end test "#complement! with bad type raises" do @entry.seq = 'ATCG' - @entry.type = 'protein' + @entry.type = :protein assert_raise(SeqError) { @entry.complement! } end test "#complement! for DNA is correct" do @entry.seq = 'ATCGatcg' - @entry.type = 'dna' + @entry.type = :dna assert_equal("TAGCtagc", @entry.complement!.seq) end test "#complement! for RNA is correct" do @entry.seq = 'AUCGaucg' - @entry.type = 'rna' + @entry.type = :rna assert_equal("UAGCuagc", @entry.complement!.seq) end @@ -306,8 +306,8 @@ class TestSeq < Test::Unit::TestCase end test "#generate with length < 1 raises" do - assert_raise(SeqError) { @entry.generate(-10, "dna") } - assert_raise(SeqError) { @entry.generate(0, "dna") } + assert_raise(SeqError) { @entry.generate(-10, :dna) } + assert_raise(SeqError) { @entry.generate(0, :dna) } end test "#generate with bad type raises" do @@ -315,8 +315,8 @@ class TestSeq < Test::Unit::TestCase end test "#generate with ok type dont raise" do - %w[dna DNA rna RNA protein Protein].each do |type| - assert_nothing_raised { @entry.generate(10, type) } + %w[dna rna protein].each do |type| + assert_nothing_raised { @entry.generate(10, type.to_sym) } end end @@ -335,15 +335,15 @@ class TestSeq < Test::Unit::TestCase test "#<< with different types raises" do @entry.seq = "atcg" - assert_raise(SeqError) { @entry << Seq.new("test", "atcg", "dna") } + assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna) } end test "#<< with missing qual in one entry raises" do @entry.seq = "atcg" - @entry.type = "dna" - assert_raise(SeqError) { @entry << Seq.new("test", "atcg", "dna", "IIII") } + @entry.type = :dna + assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna, "IIII") } @entry.qual = "IIII" - assert_raise(SeqError) { @entry << Seq.new("test", "atcg", "dna") } + assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna) } end test "#<< with nil qual in both entries dont raise" do @@ -353,9 +353,9 @@ class TestSeq < Test::Unit::TestCase test "#<< with qual in both entries dont raise" do @entry.seq = "atcg" - @entry.type = "dna" + @entry.type = :dna @entry.qual = "IIII" - assert_nothing_raised { @entry << Seq.new("test", "atcg", "dna", "IIII") } + assert_nothing_raised { @entry << Seq.new("test", "atcg", :dna, "IIII") } end test "#<< without qual returns correctly" do @@ -366,9 +366,9 @@ class TestSeq < Test::Unit::TestCase test "#<< with qual returns correctly" do @entry.seq = "atcg" - @entry.type = "dna" + @entry.type = :dna @entry.qual = "HHHH" - @entry << Seq.new("test", "ATCG", "dna", "IIII") + @entry << Seq.new("test", "ATCG", :dna, "IIII") assert_equal("atcgATCG", @entry.seq) assert_equal("HHHHIIII", @entry.qual) end -- 2.39.2