From: martinahansen Date: Thu, 9 Sep 2010 08:24:40 +0000 (+0000) Subject: added progress_meter biopiece X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ac93284e7464b3e30619c2946623e8254f3b3788;p=biopieces.git added progress_meter biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1078 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/progress_meter b/bp_bin/progress_meter new file mode 100755 index 0000000..4e03cb4 --- /dev/null +++ b/bp_bin/progress_meter @@ -0,0 +1,52 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Count records in the stream and prints a dot for each number of records and a record count per 100 dots. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'biopieces' + +casts = [] +casts << {:long=>'no_stream', :short => 'x', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil} +casts << {:long=>'count', :short => 'c', :type => 'uint', :mandatory => false, :default => 1000, :allowed => nil, :disallowed => '0'} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +bp.each_with_index do |record, count| + bp.puts record unless options.has_key? :no_stream + $stderr.printf "\n% 8d ", count if (count % (options[:count] * 100)) == 0 + $stderr.print "." if (count % options[:count]) == 0 +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/code_ruby/Maasha/lib/biopieces.rb b/code_ruby/Maasha/lib/biopieces.rb index bb51080..ca9de4e 100644 --- a/code_ruby/Maasha/lib/biopieces.rb +++ b/code_ruby/Maasha/lib/biopieces.rb @@ -38,14 +38,14 @@ require 'pp' # of lines with a key/value pair separated by a colon and a white space ': '. # Each record is separated by a line with three dashes '---'. class Biopieces + include Enumerable + # Initialize a Biopiece and write the status to file. # Options are for testing purposes only. def initialize(test=nil, input=STDIN, output=STDOUT) @test = test @input = input @output = output - @status = Status.new - @status.set unless @test end # Check the integrity of a list of casts, followed by parsion options from argv @@ -102,7 +102,7 @@ class Biopieces pid = $$ path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_") Dir.mkdir(path) - @status.set_tmpdir(path) + Status.new.set_tmpdir(path) path end end @@ -490,12 +490,16 @@ class Status # Append the a temporary directory path to the status file. def set_tmpdir(tmpdir_path) + status = "" + File.open(path, mode="r") do |fh| - fh.read.chomp + status = fh.read.chomp end + status = "#{status};#{tmpdir_path}\n" + File.open(path, mode="w") do |fh| - fh << [status, tmpdir_path].join(";") + "\n" + fh << status end end diff --git a/code_ruby/Maasha/lib/fasta.rb b/code_ruby/Maasha/lib/fasta.rb index 31c7529..bd551be 100644 --- a/code_ruby/Maasha/lib/fasta.rb +++ b/code_ruby/Maasha/lib/fasta.rb @@ -85,6 +85,14 @@ class Fasta entry end + # TODO - this should be some custom to_s method instead. + def puts(record) + if record.has_key? :SEQ_NAME and record.has_key? :SEQ + @io.print ">#{record[:SEQ_NAME]}\n" + @io.print "#{record[:SEQ]}\n" + end + end + private # Helper method to return an ios to a file that may be zipped in which case diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index e55a383..2dd9c5f 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -8,6 +8,13 @@ class SeqError < StandardError; end class Seq attr_accessor :seq_name, :seq, :type, :qual + def initialize(seq_name = nil, seq = nil, type = nil, qual = nil) + @seq_name = seq_name + @seq = seq + @type = type + @qual = qual + end + def length self.seq.nil? ? 0 : self.seq.length end @@ -15,22 +22,22 @@ class Seq alias len length # Method that returns true is a given sequence type is DNA. - def is_dna + def is_dna? self.type == 'dna' end - def is_rna + def is_rna? self.type == 'rna' end - def is_protein + def is_protein? self.type == 'protein' end # Method to transcribe DNA to RNA. def to_rna raise SeqError, "Cannot transcribe 0 length sequence" if self.length == 0 - raise SeqError, "Cannot transcribe sequence type: #{self.type}" unless self.is_dna + raise SeqError, "Cannot transcribe sequence type: #{self.type}" unless self.is_dna? self.type = 'rna' self.seq.tr!('Tt','Uu') end @@ -38,7 +45,7 @@ class Seq # Method to reverse-transcribe RNA to DNA. def to_dna raise SeqError, "Cannot reverse-transcribe 0 length sequence" if self.length == 0 - raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna + raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna? self.type = 'dna' self.seq.tr!('Uu','Tt') end @@ -54,13 +61,32 @@ class Seq record end + # Method that given a Seq entry returns a FASTA entry (a string). + def to_fasta(wrap = nil) + raise SeqError, "Missing seq_name" if self.seq_name.nil? + raise SeqError, "Missing seq" if self.seq.nil? + + seq_name = self.seq_name + seq = self.seq + + unless wrap.nil? + seq.gsub! /(.{#{wrap}})/ do |match| + match << "\n" + end + + seq.chomp! + end + + ">#{seq_name}\n#{seq}\n" + end + # Method that complements sequence including ambiguity codes. def complement raise SeqError, "Cannot complement 0 length sequence" if self.length == 0 - if self.is_dna + if self.is_dna? self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) - elsif self.is_rna + elsif self.is_rna? self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) else raise SeqError, "Cannot complement sequence type: #{self.type}" diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index 5fcea28..52f14af 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -16,30 +16,30 @@ class TestSeq < Test::Unit::TestCase # end def test_Seq_is_dna_with_no_sequence_type_returns_false - assert(@entry.is_dna == false) + assert(@entry.is_dna? == false) end def test_Seq_is_dna_with_dna_sequence_type_returns_true @entry.type = 'dna' - assert(@entry.is_dna == true) + assert(@entry.is_dna? == true) end def test_Seq_is_rna_with_no_sequence_type_returns_false - assert(@entry.is_rna == false) + assert(@entry.is_rna? == false) end def test_Seq_is_rna_with_rna_sequence_type_returns_true @entry.type = 'rna' - assert(@entry.is_rna == true) + assert(@entry.is_rna? == true) end def test_Seq_is_protein_with_no_sequence_type_returns_false - assert(@entry.is_protein == false) + assert(@entry.is_protein? == false) end def test_Seq_is_protein_with_protein_sequence_type_returns_true @entry.type = 'protein' - assert(@entry.is_protein == true) + assert(@entry.is_protein? == true) end def test_Sequence_length_is_correct @@ -111,6 +111,16 @@ class TestSeq < Test::Unit::TestCase assert_raise(SeqError) { @entry.to_bp } end + def test_Seq_to_fasta_returns_correct_entry + entry = Seq.new( "test", "ATCG" ) + assert_equal(">test\nATCG\n", entry.to_fasta) + end + + def test_Seq_to_fasta_wraps_correctly + entry = Seq.new( "test", "ATCG" ) + assert_equal(">test\nAT\nCG\n", entry.to_fasta(2)) + end + def test_Seq_complement_raises_if_no_sequence @entry.type = 'dna' assert_raise(SeqError) { @entry.complement }