--- /dev/null
+#!/usr/bin/env perl
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Read 454 entries from fasta and quality file.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use warnings;
+use strict;
+use Data::Dumper;
+use Maasha::Biopieces;
+use Maasha::Filesys;
+use Maasha::Fasta;
+use Maasha::Fastq;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $options, $in, $out, $record, $data_in, $qual_in, $num, $fasta, $qual, @seqs, @scores, $i );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'data_in', short => 'i', type => 'file!', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ { long => 'qual_in', short => 'q', type => 'file!', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' },
+ { long => 'convert2dec', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'cutoff', short => 'C', type => 'int', mandatory => 'no', default => 20, allowed => undef, disallowed => undef },
+ { long => 'soft_mask', short => 's', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+while ( $record = Maasha::Biopieces::get_record( $in ) ) {
+ Maasha::Biopieces::put_record( $record, $out );
+}
+
+if ( $options->{ 'data_in' } )
+{
+ $data_in = Maasha::Filesys::file_read_open( $options->{ 'data_in' } );
+ $qual_in = Maasha::Filesys::file_read_open( $options->{ 'qual_in' } );
+
+ $num = 1;
+
+ while ( $fasta = Maasha::Fasta::get_entry( $data_in ) )
+ {
+ $qual = get_qual( $qual_in );
+
+ check_names( $fasta, $qual );
+
+ $record = {
+ SEQ_NAME => $fasta->[ 0 ],
+ SEQ => $fasta->[ 1 ],
+ SCORES => $qual->[ 1 ],
+ };
+
+ Maasha::Fastq::softmask_solexa_str( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'cutoff' } ) if $options->{ 'soft_mask' };
+ $record->{ 'SCORES' } = Maasha::Fastq::solexa_str2dec_str( $record->{ 'SCORES' } ) if $options->{ 'convert2dec' };
+
+ Maasha::Biopieces::put_record( $record, $out );
+
+ last if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ close $qual_in;
+}
+
+Maasha::Biopieces::close_stream( $in );
+Maasha::Biopieces::close_stream( $out );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_qual
+{
+ # Martin A. Hansen, May 2010.
+
+ # Get the next 454 quality entry from an open file.
+
+ my ( $fh, # file handle
+ ) = @_;
+
+ # Returns list.
+
+ my ( $block, $name, $qual, $scores, $entry );
+
+ local $/ = "\n>";
+
+ while ( $block = <$fh> )
+ {
+ chomp $block;
+
+ last if $block !~ /^\s+$/;
+ }
+
+ return if not defined $block;
+
+ $block =~ /^>?(.+)\n/m;
+ $name = $1;
+ $qual = $';
+
+ local $/ = "\n";
+
+ chomp $qual;
+
+ $name =~ tr/\r//d;
+ $qual =~ tr/ \n\r/;;;/;
+ $qual =~ s/;;/;/g;
+
+ $scores = Maasha::Fastq::dec_str2solexa_str( $qual );
+
+ $entry = [ $name, $scores ];
+
+ return wantarray ? @{ $entry } : $entry;
+}
+
+
+sub check_names
+{
+ # Martin A. Hansen, May 2010.
+
+ # Check if the names of the fasta and qual entries match
+ # and raise an error if they don't.
+
+ my ( $fasta, # fasta entry
+ $qual, # qual entry
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $f_name, $q_name );
+
+ if ( $fasta->[ 0 ] =~ /^([^ ]+)/ ) {
+ $f_name = $1;
+ }
+
+ if ( $qual->[ 0 ] =~ /^([^ ]+)/ ) {
+ $q_name = $1;
+ }
+
+ Maasha::Common::error( qq(names don't match "$fasta->[ 0 ]" ne "$qual->[ 0 ]") ) if $f_name ne $q_name;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ Maasha::Biopieces::status_set();
+}
+
+
+END
+{
+ Maasha::Biopieces::status_log();
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+#!/usr/bin/env perl
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Write sequences and scores from stream to FASTA and QUAL files.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use warnings;
+use strict;
+use Maasha::Biopieces;
+use Maasha::Fastq;
+use Maasha::Fasta;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $options, $in, $out, $record, $data_out, $qual_out, $entry );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'data_out', short => 'o', type => 'file', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ { long => 'qual_out', short => 'q', type => 'file', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef },
+ { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+$qual_out = Maasha::Biopieces::write_stream( $options->{ "qual_out" }, $options->{ "compress" } );
+
+while ( $record = Maasha::Biopieces::get_record( $in ) )
+{
+ if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) )
+ {
+ Maasha::Fasta::put_entry( $entry, $data_out );
+
+ qual_put_entry( $entry->[ 0 ], $record->{ 'SCORES' }, $qual_out );
+ }
+
+ Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
+}
+
+close $data_out;
+close $qual_out;
+
+Maasha::Biopieces::close_stream( $in );
+Maasha::Biopieces::close_stream( $out );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub qual_put_entry
+{
+ # Martin A. Hansen, May 2010.
+
+ # Output a score string in 454 quality format.
+
+ my ( $name, # entry name
+ $score_str, # scores in Solexa format.
+ $fh, # output filehandle
+ ) = @_;
+
+ # Returns nothing.
+
+ $score_str = Maasha::Fastq::solexa_str2dec_str( $score_str );
+ $score_str =~ tr/;/ /;
+
+ print $fh ">$name\n";
+ print $fh "$score_str\n";
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ Maasha::Biopieces::status_set();
+}
+
+
+END
+{
+ Maasha::Biopieces::status_log();
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
### Here we add the Biopieces Ruby libraries to RUBYLIB.
-export RUBYLIB="$RUBYLIB:$BP_RUBY"
+export RUBYLIB="$RUBYLIB:$BP_RUBY/Maasha/lib"
### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
}
+sub dec_str2solexa_str
+{
+ # Martin A. Hansen, May 2010.
+
+ # Converts a ; separated string of decimal scores to a
+ # string of Solexa octal scores.
+
+ my ( $scores, # Decimal score string
+ ) = @_;
+
+ # Returns a string.
+
+ $scores =~ s/(\d{1,2});?/dec2solexa( $1 )/eg;
+
+ return $scores;
+}
+
+
sub get_entry
{
# Martin A. Hansen, July 2009.
--- /dev/null
+require 'optparse'
+require 'open3'
+require 'pp'
+
+# Error class for all exceptions to do with option casts.
+class CastError < StandardError
+end
+
+# Class using OptionParser to parse command line options according to a list of
+# casts. Each cast prescribes the long and short name of the option, the type,
+# if it is mandatory, the default value, and allowed and disallowed values. An
+# optional list of extra casts can be supplied, and the integrity of the casts
+# are checked. Following the command line parsing, the options are checked
+# according to the casts.
+class Biopieces
+ TYPES = %w[flag string list int uint float file file! files files! dir dir! genome]
+ MANDATORY = %w[long short type mandatory default allowed disallowed]
+ REGEX_LIST = /^(list|files|files!)$/
+ REGEX_INT = /^(int|uint)$/
+ REGEX_STRING = /^(string|file|file!|dir|dir!|genome)$/
+
+ # Check the integrity of a list of casts, followed by parsion options from argv
+ # and finally checking the options according to the casts. Returns nil if
+ # argv is empty, otherwise an options hash.
+ def parse(argv,casts=[],script_path=$0)
+ @casts = casts
+ @script_path = script_path
+
+ cast_ubiquitous
+ cast_check
+
+ @options = {}
+
+ options_template = OptionParser.new do |option|
+ @casts.each do |cast|
+ if cast[:type] == 'flag'
+ option.on("-#{cast[:short]}", "--#{cast[:long]}") do |o|
+ @options[cast[:long]] = o
+ end
+ elsif cast[:type] =~ REGEX_LIST
+ option.on( "-#{cast[:short]}", "--#{cast[:long]} A", Array) do |a|
+ @options[cast[:long]] = a
+ end
+ elsif cast[:type] =~ REGEX_INT
+ option.on("-#{cast[:short]}", "--#{cast[:long]} I", Integer) do |i|
+ @options[cast[:long]] = i
+ end
+ elsif cast[:type] =~ REGEX_STRING
+ option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s|
+ @options[cast[:long]] = s
+ end
+ elsif cast[:type] == 'float'
+ option.on("-#{cast[:short]}", "--#{cast[:long]} F", Float) do |f|
+ @options[cast[:long]] = f
+ end
+ else
+ raise ArgumentError, "Unknown option type: '#{cast[:type]}'"
+ end
+ end
+ end
+
+ options_template.parse!(argv)
+
+ if print_usage_full?
+ print_usage_and_exit(true)
+ return # break for unit testing.
+ elsif print_usage_short?
+ print_usage_and_exit
+ return # break for unit testing.
+ end
+
+ options_default
+ options_glob
+ options_check
+
+ @options
+ end
+
+ private
+
+ # Given the script name determine the path of the wiki file with the usage info.
+ def wiki_path
+ path = ENV["BP_DIR"] + "/bp_usage/" + File.basename(@script_path, ".rb") + ".wiki"
+ raise "No such wiki file: #{path}" unless File.file? path
+
+ path
+ end
+
+ # Add ubiquitous options casts.
+ def cast_ubiquitous
+ @casts << {:long => 'help', :short => '?', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ @casts << {:long => 'stream_in', :short => 'I', :type => 'files!', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ @casts << {:long => 'stream_out', :short => 'O', :type => 'file', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ @casts << {:long => 'verbose', :short => 'v', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil}
+ end
+
+ # Check integrity of the casts.
+ def cast_check
+ cast_check_keys
+ cast_check_values
+ cast_check_duplicates
+ end
+
+ # Check if all mandatory keys are present in casts and raise if not.
+ def cast_check_keys
+ @casts.each do |cast|
+ MANDATORY.each do |mandatory|
+ raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym
+ end
+ end
+ end
+
+ # Check if all values in casts are valid.
+ def cast_check_values
+ @casts.each do |cast|
+ cast_check_val_long(cast)
+ cast_check_val_short(cast)
+ cast_check_val_type(cast)
+ cast_check_val_mandatory(cast)
+ cast_check_val_default(cast)
+ cast_check_val_allowed(cast)
+ cast_check_val_disallowed(cast)
+ end
+ end
+
+ # Check if the values to long are legal and raise if not.
+ def cast_check_val_long(cast)
+ unless cast[:long].is_a? String and cast[:long].length > 1
+ raise CastError, "Illegal cast of long: '#{cast[:long]}'"
+ end
+ end
+
+ # Check if the values to short are legal and raise if not.
+ def cast_check_val_short(cast)
+ unless cast[:short].is_a? String and cast[:short].length == 1
+ raise CastError, "Illegal cast of short: '#{cast[:short]}'"
+ end
+ end
+
+ # Check if values to type are legal and raise if not.
+ def cast_check_val_type(cast)
+ type_hash = {}
+ TYPES.each do |type|
+ type_hash[type] = true
+ end
+
+ unless type_hash.has_key? cast[:type]
+ raise CastError, "Illegal cast of type: '#{cast[:type]}'"
+ end
+ end
+
+ # Check if values to mandatory are legal and raise if not.
+ def cast_check_val_mandatory(cast)
+ unless cast[:mandatory] == true or cast[:mandatory] == false
+ raise CastError, "Illegal cast of mandatory: '#{cast[:mandatory]}'"
+ end
+ end
+
+ # Check if values to default are legal and raise if not.
+ def cast_check_val_default(cast)
+ unless cast[:default].nil? or
+ cast[:default].is_a? String or
+ cast[:default].is_a? Integer or
+ cast[:default].is_a? Float
+ raise CastError, "Illegal cast of default: '#{cast[:default]}'"
+ end
+ end
+
+ # Check if values to allowed are legal and raise if not.
+ def cast_check_val_allowed(cast)
+ unless cast[:allowed].is_a? String or cast[:allowed].nil?
+ raise CastError, "Illegal cast of allowed: '#{cast[:allowed]}'"
+ end
+ end
+
+ # Check if values to disallowed are legal and raise if not.
+ def cast_check_val_disallowed(cast)
+ unless cast[:disallowed].is_a? String or cast[:disallowed].nil?
+ raise CastError, "Illegal cast of disallowed: '#{cast[:disallowed]}'"
+ end
+ end
+
+ # Check cast for duplicate long or short options names.
+ def cast_check_duplicates
+ check_hash = {}
+ @casts.each do |cast|
+ raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long]
+ raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short]
+ check_hash[cast[:long]] = true
+ check_hash[cast[:short]] = true
+ end
+ end
+
+ # Check if full "usage info" should be printed.
+ def print_usage_full?
+ @options["help"]
+ end
+
+ # Check if short "usage info" should be printed.
+ def print_usage_short?
+ if not $stdin.tty?
+ return false
+ elsif @options["stream_in"]
+ return false
+ elsif @options["data_in"]
+ return false
+ elsif wiki_path =~ /^(list_biopieces|list_genomes|list_mysql_databases|biostat)$/ # TODO get rid of this!
+ return false
+ else
+ return true
+ end
+ end
+
+ # Print usage info by Calling an external script 'print_wiki'
+ # using a system() call and exit. An optional 'full' flag
+ # outputs the full usage info.
+ def print_usage_and_exit(full=nil)
+ if full
+ system("print_wiki --data_in #{wiki_path} --help")
+ else
+ system("print_wiki --data_in #{wiki_path}")
+ end
+
+ raise "Failed printing wiki: #{wiki_path}" unless $?.success?
+
+ exit
+ end
+
+ # Set default options value from cast unless a value is set.
+ def options_default
+ @casts.each do |cast|
+ if cast[:default]
+ @options[cast[:long]] = cast[:default] unless @options.has_key? cast[:long]
+ end
+ end
+ end
+
+ # Expands glob expressions to a full list of paths.
+ # Examples: "*.fna" or "foo.fna,*.fna" or "foo.fna,/bar/*.fna"
+ def options_glob
+ @casts.each do |cast|
+ if cast[:type] == 'files' or cast[:type] == 'files!'
+ if @options.has_key? cast[:long]
+ files = []
+
+ @options[cast[:long]].each do |path|
+ if path.include? "*"
+ Dir.glob(path).each do |file|
+ files << file if File.file? file
+ end
+ else
+ files << path
+ end
+ end
+
+ @options[cast[:long]] = files
+ end
+ end
+ end
+ end
+
+ # Check all options according to casts.
+ def options_check
+ @casts.each do |cast|
+ options_check_mandatory(cast)
+ options_check_int(cast)
+ options_check_uint(cast)
+ options_check_file(cast)
+ options_check_files(cast)
+ options_check_dir(cast)
+ options_check_allowed(cast)
+ options_check_disallowed(cast)
+ end
+ end
+
+ # Check if a mandatory option is set and raise if it isn't.
+ def options_check_mandatory(cast)
+ if cast[:mandatory]
+ raise ArgumentError, "Mandatory argument: --#{cast[:long]}" unless @options.has_key? cast[:long]
+ end
+ end
+
+ # Check int type option and raise if not an integer.
+ def options_check_int(cast)
+ if cast[:type] == 'int' and @options.has_key? cast[:long]
+ unless @options[cast[:long]].is_a? Integer
+ raise ArgumentError, "Argument to --#{cast[:long]} must be an integer, not '#{@options[cast[:long]]}'"
+ end
+ end
+ end
+
+ # Check uint type option and raise if not an unsinged integer.
+ def options_check_uint(cast)
+ if cast[:type] == 'uint' and @options.has_key? cast[:long]
+ unless @options[cast[:long]].is_a? Integer and @options[cast[:long]] >= 0
+ raise ArgumentError, "Argument to --#{cast[:long]} must be an unsigned integer, not '#{@options[cast[:long]]}'"
+ end
+ end
+ end
+
+ # Check file! type argument and raise if file don't exists.
+ def options_check_file(cast)
+ if cast[:type] == 'file!' and @options.has_key? cast[:long]
+ raise ArgumentError, "No such file: '#{@options[cast[:long]]}'" unless File.file? @options[cast[:long]]
+ end
+ end
+
+ # Check files! type argument and raise if files don't exists.
+ def options_check_files(cast)
+ if cast[:type] == 'files!' and @options.has_key? cast[:long]
+ @options[cast[:long]].each do |path|
+ raise ArgumentError, "No such file: '#{path}'" unless File.file? path
+ end
+ end
+ end
+
+ # Check dir! type argument and raise if directory don't exist.
+ def options_check_dir(cast)
+ if cast[:type] == 'dir!' and @options.has_key? cast[:long]
+ raise ArgumentError, "No such directory: '#{@options[cast[:long]]}'" unless File.directory? @options[cast[:long]]
+ end
+ end
+
+ # Check options and raise unless allowed.
+ def options_check_allowed(cast)
+ if cast[:allowed] and @options.has_key? cast[:long]
+ allowed_hash = {}
+ cast[:allowed].split(',').each { |a| allowed_hash[a] = 1 }
+
+ raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} not allowed" unless allowed_hash.has_key? @options[cast[:long]]
+ end
+ end
+
+ # Check disallowed argument values and raise if disallowed.
+ def options_check_disallowed(cast)
+ if cast[:disallowed] and @options.has_key? cast[:long]
+ cast[:disallowed].split(',').each do |val|
+ raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} is disallowed" if val == @options[cast[:long]]
+ end
+ end
+ end
+end
+
+
+class Stream
+ def initialize(options)
+ @options = options
+ @in = stream_in_open
+ @out = stream_out_open
+ end
+
+ def each_record
+ record = {}
+
+ @in.each_line do |line|
+ case line
+ when /^([^:]+): (.*)$/
+ record[$1] = $2
+ when /^---$/
+ yield record unless record.empty?
+ record = {}
+ else
+ raise "Bad record format: #{line}"
+ end
+ end
+
+ yield record unless record.empty?
+
+ self # conventionally
+ end
+
+ alias :each :each_record
+
+ def puts(record)
+ record.each do |key,value|
+ @out.print "#{key}: #{value}\n"
+ end
+
+ @out.print "---\n"
+ end
+
+ private
+
+ # Open Biopieces input data stream for reading from either
+ # stdin or from a list of files specified in options["stream_in"].
+ def stream_in_open
+ if not $stdin.tty?
+ stream = $stdin
+ else
+ stream = read(@options["stream_in"])
+ end
+
+ stream
+ end
+
+ # Open Biopieces output data stream for writing to stdout
+ # or a file specified in options["stream_out"].
+ def stream_out_open
+ if @options["stream_out"]
+ stream = write(@options["stream_out"], @options["compress"])
+ else
+ stream = $stdout
+ end
+
+ stream
+ end
+
+ # Opens a reads stream to a list of files.
+ def read(files)
+ if zipped?(files)
+ stream = zread(files)
+ else
+ stream = nread(files)
+ end
+
+ stream
+ end
+
+ # Opens a write stream to a file and returns a _io_ object.
+ def write(file, zip=nil)
+ zip ? zwrite(file) : nwrite(file)
+ end
+
+ # Test if a list of files are gzipped or not.
+ # Raises if files are mixed zipped and unzipped.
+ def zipped?(files)
+ type_hash = {}
+
+ files.each do |file|
+ type = `file #{file}`
+
+ if type =~ /gzip compressed/
+ type_hash[:gzip] = true
+ else
+ type_hash[:ascii] = true
+ end
+ end
+
+ raise "Mixture of zipped and unzipped files" if type_hash.size == 2
+
+ type_hash[:gzip]
+ end
+
+ # Opens a list of gzipped files for reading and return an _io_ object.
+ def zread(files)
+ stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' '));
+ stdin.close
+ stderr.close
+ stdout
+ end
+
+ # Opens a file for gzipped writing and return an _io_ object.
+ def zwrite(file)
+ stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}")
+ stderr.close
+ stdout.close
+ stdin
+ end
+
+ # Opens a list of files for reading and return an _io_ object.
+ def nread(files)
+ stdin, stdout, stderr = Open3.popen3("cat " + files.join(' '));
+ stdin.close
+ stderr.close
+ stdout
+ end
+
+ # Opens a file for writing and return an _io_ object.
+ def nwrite(file)
+ File.open(file, mode="w")
+ end
+end
+
+
+class Status
+ def initialize
+ status_set
+ end
+
+ def status_set
+ now = Time.new
+ time = now.strftime("%Y-%m-%d %X")
+ user = ENV["USER"]
+ script = File.basename($0, ".rb")
+ pid = $$
+ path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
+
+ File.open(path, mode="w") { |file| file.puts [time, ARGV.join(" ")].join(";") }
+ end
+
+ def log(status="OK")
+ now = Time.new
+ time1 = now.strftime("%Y-%m-%d %X")
+ user = ENV["USER"]
+ script = File.basename($0, ".rb")
+ pid = $$
+ path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".")
+
+ stream = File.open(path)
+ time0, args, tmp_dir = stream.first.split(";")
+
+ # Dir.rmdir(tmp_dir) unless tmp_dir.nil? and File.directory? tmp_dir #TODO fix this!
+
+ elap = time_diff(time0, time1)
+ command = [script, args].join(" ")
+ log_file = ENV["BP_LOG"] + "/biopieces.log"
+
+ File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, status, command].join("\t") }
+ end
+
+ private
+
+ def time_diff(t0, t1)
+ t0 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/
+ year0 = $1.to_i
+ mon0 = $2.to_i
+ day0 = $3.to_i
+ hour0 = $4.to_i
+ min0 = $5.to_i
+ sec0 = $6.to_i
+
+ sec0 += day0 * 24 * 60 * 60
+ sec0 += hour0 * 60 * 60
+ sec0 += min0 * 60
+
+ t1 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/
+ year1 = $1.to_i
+ mon1 = $2.to_i
+ day1 = $3.to_i
+ hour1 = $4.to_i
+ min1 = $5.to_i
+ sec1 = $6.to_i
+
+ sec1 += day1 * 24 * 60 * 60
+ sec1 += hour1 * 60 * 60
+ sec1 += min1 * 60
+
+ year = year1 - year0
+ mon = mon1 - mon0
+ day = day1 - day0
+
+ sec = sec1 - sec0
+
+ hour = ( sec / ( 60 * 60 ) ).to_i
+ sec -= hour * 60 * 60
+
+ min = ( sec / 60 ).to_i
+ sec -= min * 60
+
+ [sprintf("%02d", hour), sprintf("%02d", min), sprintf("%02d", sec)].join(":")
+ end
+end
+
+
+__END__
--- /dev/null
+#!/usr/bin/env ruby
+# $:.unshift File.join(File.dirname(__FILE__),'..','lib')
+
+require 'test/unit'
+require 'mocha'
+require 'biopieces'
+require 'pp'
+
+TYPES = %w[flag string list int uint float file file! files files! dir dir! genome]
+SCRIPT_PATH = "write_fasta.rb"
+
+class OptionTest < Test::Unit::TestCase
+
+ # >>>>>>>>>>>>>>>>>>>> Testing Options.new <<<<<<<<<<<<<<<<<<<<
+
+ test "Options.new with no argument don't raise" do
+ assert_nothing_raised { Options.new }
+ end
+
+ test "Options.new with empty list don't raise" do
+ assert_nothing_raised { Options.new([]) }
+ end
+
+ test "Options.new with non-array argument raises" do
+ %w[ "foo", 1, 1.2, {}, nil, false, true ].each do |arg|
+ assert_raise(TypeError) { Options.new(arg) }
+ end
+ end
+
+ test "Options.new with missing cast key raises" do
+ assert_raise(CastError) { Options.new([{}]) }
+ end
+
+ test "Options.new with all cast keys don't raise" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+
+ test "Options.new with illegal long cast values raises" do
+ [nil, true, false, 1, 0, "a"].each do |long|
+ casts = [{:long=>long, :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_raise(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal long cast values don't raise" do
+ ["foo", "!!", "0123"].each do |long|
+ casts = [{:long=>long, :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with illegal short cast values raises" do
+ [nil, true, false, "foo"].each do |short|
+ casts = [{:long=>"foo", :short=>short, :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_raise(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal short cast values don't raise" do
+ ["!", "1", "a"].each do |short|
+ casts = [{:long=>"foo", :short=>short, :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with illegal type cast values raises" do
+ [nil, true, false, "foo", 12, 0].each do |type|
+ casts = [{:long=>"foo", :short=>"f", :type=>type, :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_raise(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal type cast values don't raise" do
+ TYPES.each do |type|
+ casts = [{:long=>"foo", :short=>"f", :type=>type, :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with illegal mandatory cast values raises" do
+ ["yes", 12, 0, nil].each do |mandatory|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>mandatory, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_raise(CastError) {Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal mandatory cast values don't raise" do
+ [true, false].each do |mandatory|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>mandatory, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ assert_nothing_raised(CastError) {Options.new(casts) }
+ end
+ end
+
+ test "Options.new with illegal default cast values raises" do
+ [true, false, [], {}].each do |default|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>default, :allowed=>nil, :disallowed=>nil}]
+ assert_raise(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal default cast values don't raise" do
+ ["foo", nil, 0, 0.0, 1, 1.2].each do |default|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>default, :allowed=>nil, :disallowed=>nil}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with illegal allowed cast values raises" do
+ [true, false, {}, [], 0, 0.1].each do |allowed|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>allowed, :disallowed=>nil}]
+ assert_raise(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal allowed cast values don't raise" do
+ ["foo,bar",nil].each do |allowed|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>allowed, :disallowed=>nil}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with illegal disallowed cast values raises" do
+ [true, false, {}, [], 0, 0.1].each do |disallowed|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>disallowed}]
+ assert_raise(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with legal disallowed cast values don't raise" do
+ ["foo,bar",nil].each do |disallowed|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>disallowed}]
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+ end
+
+ test "Options.new with duplicate long cast values raises" do
+ casts = []
+ casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ casts << {:long=>"foo", :short=>"b", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ assert_raise(CastError) { Options.new(casts) }
+ end
+
+ test "Options.new with duplicate short cast values raises" do
+ casts = []
+ casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ casts << {:long=>"bar", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ assert_raise(CastError) { Options.new(casts) }
+ end
+
+ test "Options.new without duplicate long and short cast values don't raise" do
+ casts = []
+ casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ casts << {:long=>"bar", :short=>"b", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+ assert_nothing_raised(CastError) { Options.new(casts) }
+ end
+
+ # >>>>>>>>>>>>>>>>>>>> Testing Options.parse <<<<<<<<<<<<<<<<<<<<
+
+ test "Options.parse with empty argv and missing wiki file raises" do
+ opt_parser = Options.new
+ assert_raise() { opt_parser.parse([], "foo") }
+ end
+
+ test "Options.parse with empty argv and existing wiki file don't raise" do
+ opt_parser = Options.new
+ opt_parser.expects(:print_usage_and_exit).with()
+ assert_nothing_raised { opt_parser.parse([], SCRIPT_PATH) }
+ end
+
+ test "Options.parse with --help in argv and existing wiki output long usage" do
+ opt_parser = Options.new
+ opt_parser.expects(:print_usage_and_exit).with(true)
+ assert_nothing_raised { opt_parser.parse(["--help"],SCRIPT_PATH) }
+ end
+
+# # FIXME This one fails because any argument to a flag is ignored and the flag value is set to true. Should it raise?
+# test "Options.parse with type cast flag with an argument raises" do
+# casts = [{:long=>"foo", :short=>"f", :type=>"flag", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+# opt_parser = Options.new(casts)
+# assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bar"],SCRIPT_PATH) }
+# end
+
+ test "Options.parse with --stream_in <arg> returns options['stream_in']=>[<arg>]" do
+ opt_parser = Options.new
+ options = opt_parser.parse(["--stream_in", __FILE__],SCRIPT_PATH)
+ assert_equal([__FILE__], options["stream_in"])
+ end
+
+ test "Options.parse with -I <arg> returns options['stream_in']=>[<arg>]" do
+ opt_parser = Options.new
+ options = opt_parser.parse(["-I", __FILE__],SCRIPT_PATH)
+ assert_equal([__FILE__], options["stream_in"])
+ end
+
+ test "Options.parse use cast default value if no argument given" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ options = opt_parser.parse(["-I", __FILE__],SCRIPT_PATH)
+ assert_equal(options["foo"], "bar")
+ end
+
+ test "Options.parse don't use default value if argument given" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ options = opt_parser.parse(["--foo", "bleh", "-I", __FILE__],SCRIPT_PATH)
+ assert_equal(options["foo"], "bleh")
+ end
+
+ test "Options.parse with mandatory cast and no argument raises" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_raise(ArgumentError) { opt_parser.parse(["-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ test "Options.parse with mandatory cast and argument don't raise" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "bar", "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+# # This one results in an error: "OptionParser::InvalidArgument: invalid argument: --foo bar"
+# # So it appears that this is tested in OptionParser already.
+# test "Options.parse with type cast int and non-int value raises" do
+# ["bar" ].each do |val| # what about nil, false, true, [], {}, 0.1 ?
+# casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+# opt_parser = Options.new(casts)
+# assert_raise(ArgumentError) { opt_parser.parse(["--foo", "#{val}"],SCRIPT_PATH) }
+# end
+# end
+
+ test "Options.parse with type cast int don't raise" do
+ [0,-1,1,327649123746293746374276347824].each do |val|
+ casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) }
+ end
+ end
+
+ # TODO similar test for uint as "test "Options.parse with type cast int and non-int value raises" do"
+
+ test "Options.parse with type cast uint don't raise" do
+ [0,1,327649123746293746374276347824].each do |val|
+ casts = [{:long=>"foo", :short=>"f", :type=>"uint", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) }
+ end
+ end
+
+ test "Options.parse with file! cast and file don't exists raises" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bleh", "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ test "Options.parse with file! cast and existing file don't raise" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", __FILE__, "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ test "Options.parse with files! cast and a file don't exists raises" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_raise(ArgumentError) { opt_parser.parse(["--foo", __FILE__ + ",bleh", "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ test "Options.parse with files! cast and files exists don't raise" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", __FILE__ + "," + __FILE__, "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ # TODO replace the absolute part below the file location with File.dirname(__FILE__)
+ test "Options.parse with glob argument expands correctly" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ argv = ["--foo", "/Users/maasha/unit_test/foo*,/Users/maasha/unit_test/my_dir/*.fna", "-I", __FILE__]
+ opt_parser = Options.new(casts)
+ options = opt_parser.parse(argv,SCRIPT_PATH)
+ assert_equal(options["foo"], ["/Users/maasha/unit_test/foo.fna", "/Users/maasha/unit_test/my_dir/bar.fna"])
+ end
+
+ test "Options.parse with dir! cast and dir don't exists raises" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bleh", "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ test "Options.parse with dir! cast and dir exists don't raise" do
+ casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "/", "-I", __FILE__],SCRIPT_PATH) }
+ end
+
+ test "Options.parse with allowed cast and not allowed value raises" do
+ ["bleh", "2", "3.3"].each do |val|
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_raise(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) }
+ end
+ end
+
+ test "Options.parse with allowed cast and allowed values don't raise" do
+ ["0", "-1", "0.0", "1", "bar"].each do |val|
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) }
+ end
+ end
+
+ test "Options.parse with disallowed cast and disallowed value raises" do
+ ["0", "-1", "0.0", "1", "bar"].each do |val|
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}]
+ opt_parser = Options.new(casts)
+ assert_raise(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) }
+ end
+ end
+
+ test "Options.parse with disallowed cast and allowed values don't raise" do
+ ["bleh", "2", "3.3"].each do |val|
+ casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}]
+ opt_parser = Options.new(casts)
+ assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) }
+ end
+ end
+end