--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Count records in the stream and prints a dot for each number of records and a record count per 100 dots.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'biopieces'
+
+casts = []
+casts << {:long=>'no_stream', :short => 'x', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+casts << {:long=>'count', :short => 'c', :type => 'uint', :mandatory => false, :default => 1000, :allowed => nil, :disallowed => '0'}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+bp.each_with_index do |record, count|
+ bp.puts record unless options.has_key? :no_stream
+ $stderr.printf "\n% 8d ", count if (count % (options[:count] * 100)) == 0
+ $stderr.print "." if (count % options[:count]) == 0
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
# of lines with a key/value pair separated by a colon and a white space ': '.
# Each record is separated by a line with three dashes '---'.
class Biopieces
+ include Enumerable
+
# Initialize a Biopiece and write the status to file.
# Options are for testing purposes only.
def initialize(test=nil, input=STDIN, output=STDOUT)
@test = test
@input = input
@output = output
- @status = Status.new
- @status.set unless @test
end
# Check the integrity of a list of casts, followed by parsion options from argv
pid = $$
path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_")
Dir.mkdir(path)
- @status.set_tmpdir(path)
+ Status.new.set_tmpdir(path)
path
end
end
# Append the a temporary directory path to the status file.
def set_tmpdir(tmpdir_path)
+ status = ""
+
File.open(path, mode="r") do |fh|
- fh.read.chomp
+ status = fh.read.chomp
end
+ status = "#{status};#{tmpdir_path}\n"
+
File.open(path, mode="w") do |fh|
- fh << [status, tmpdir_path].join(";") + "\n"
+ fh << status
end
end
entry
end
+ # TODO - this should be some custom to_s method instead.
+ def puts(record)
+ if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ @io.print ">#{record[:SEQ_NAME]}\n"
+ @io.print "#{record[:SEQ]}\n"
+ end
+ end
+
private
# Helper method to return an ios to a file that may be zipped in which case
class Seq
attr_accessor :seq_name, :seq, :type, :qual
+ def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
+ @seq_name = seq_name
+ @seq = seq
+ @type = type
+ @qual = qual
+ end
+
def length
self.seq.nil? ? 0 : self.seq.length
end
alias len length
# Method that returns true is a given sequence type is DNA.
- def is_dna
+ def is_dna?
self.type == 'dna'
end
- def is_rna
+ def is_rna?
self.type == 'rna'
end
- def is_protein
+ def is_protein?
self.type == 'protein'
end
# Method to transcribe DNA to RNA.
def to_rna
raise SeqError, "Cannot transcribe 0 length sequence" if self.length == 0
- raise SeqError, "Cannot transcribe sequence type: #{self.type}" unless self.is_dna
+ raise SeqError, "Cannot transcribe sequence type: #{self.type}" unless self.is_dna?
self.type = 'rna'
self.seq.tr!('Tt','Uu')
end
# Method to reverse-transcribe RNA to DNA.
def to_dna
raise SeqError, "Cannot reverse-transcribe 0 length sequence" if self.length == 0
- raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna
+ raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna?
self.type = 'dna'
self.seq.tr!('Uu','Tt')
end
record
end
+ # Method that given a Seq entry returns a FASTA entry (a string).
+ def to_fasta(wrap = nil)
+ raise SeqError, "Missing seq_name" if self.seq_name.nil?
+ raise SeqError, "Missing seq" if self.seq.nil?
+
+ seq_name = self.seq_name
+ seq = self.seq
+
+ unless wrap.nil?
+ seq.gsub! /(.{#{wrap}})/ do |match|
+ match << "\n"
+ end
+
+ seq.chomp!
+ end
+
+ ">#{seq_name}\n#{seq}\n"
+ end
+
# Method that complements sequence including ambiguity codes.
def complement
raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
- if self.is_dna
+ if self.is_dna?
self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
- elsif self.is_rna
+ elsif self.is_rna?
self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
else
raise SeqError, "Cannot complement sequence type: #{self.type}"
# end
def test_Seq_is_dna_with_no_sequence_type_returns_false
- assert(@entry.is_dna == false)
+ assert(@entry.is_dna? == false)
end
def test_Seq_is_dna_with_dna_sequence_type_returns_true
@entry.type = 'dna'
- assert(@entry.is_dna == true)
+ assert(@entry.is_dna? == true)
end
def test_Seq_is_rna_with_no_sequence_type_returns_false
- assert(@entry.is_rna == false)
+ assert(@entry.is_rna? == false)
end
def test_Seq_is_rna_with_rna_sequence_type_returns_true
@entry.type = 'rna'
- assert(@entry.is_rna == true)
+ assert(@entry.is_rna? == true)
end
def test_Seq_is_protein_with_no_sequence_type_returns_false
- assert(@entry.is_protein == false)
+ assert(@entry.is_protein? == false)
end
def test_Seq_is_protein_with_protein_sequence_type_returns_true
@entry.type = 'protein'
- assert(@entry.is_protein == true)
+ assert(@entry.is_protein? == true)
end
def test_Sequence_length_is_correct
assert_raise(SeqError) { @entry.to_bp }
end
+ def test_Seq_to_fasta_returns_correct_entry
+ entry = Seq.new( "test", "ATCG" )
+ assert_equal(">test\nATCG\n", entry.to_fasta)
+ end
+
+ def test_Seq_to_fasta_wraps_correctly
+ entry = Seq.new( "test", "ATCG" )
+ assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
+ end
+
def test_Seq_complement_raises_if_no_sequence
@entry.type = 'dna'
assert_raise(SeqError) { @entry.complement }