From: martinahansen Date: Wed, 19 May 2010 07:01:33 +0000 (+0000) Subject: added read_454 and write_454 (and ruby stuff) X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=510346353f3669b1bd7595acf0efab26b11c1f71;hp=508e3a6973a59061775be6563bb75f5de6c3cb71;p=biopieces.git added read_454 and write_454 (and ruby stuff) git-svn-id: http://biopieces.googlecode.com/svn/trunk@961 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/read_454 b/bp_bin/read_454 new file mode 100755 index 0000000..b84ab12 --- /dev/null +++ b/bp_bin/read_454 @@ -0,0 +1,190 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read 454 entries from fasta and quality file. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Data::Dumper; +use Maasha::Biopieces; +use Maasha::Filesys; +use Maasha::Fasta; +use Maasha::Fastq; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, $data_in, $qual_in, $num, $fasta, $qual, @seqs, @scores, $i ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'data_in', short => 'i', type => 'file!', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'qual_in', short => 'q', type => 'file!', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, + { long => 'convert2dec', short => 'c', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'cutoff', short => 'C', type => 'int', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, + { long => 'soft_mask', short => 's', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) { + Maasha::Biopieces::put_record( $record, $out ); +} + +if ( $options->{ 'data_in' } ) +{ + $data_in = Maasha::Filesys::file_read_open( $options->{ 'data_in' } ); + $qual_in = Maasha::Filesys::file_read_open( $options->{ 'qual_in' } ); + + $num = 1; + + while ( $fasta = Maasha::Fasta::get_entry( $data_in ) ) + { + $qual = get_qual( $qual_in ); + + check_names( $fasta, $qual ); + + $record = { + SEQ_NAME => $fasta->[ 0 ], + SEQ => $fasta->[ 1 ], + SCORES => $qual->[ 1 ], + }; + + Maasha::Fastq::softmask_solexa_str( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'cutoff' } ) if $options->{ 'soft_mask' }; + $record->{ 'SCORES' } = Maasha::Fastq::solexa_str2dec_str( $record->{ 'SCORES' } ) if $options->{ 'convert2dec' }; + + Maasha::Biopieces::put_record( $record, $out ); + + last if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + + close $data_in; + close $qual_in; +} + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub get_qual +{ + # Martin A. Hansen, May 2010. + + # Get the next 454 quality entry from an open file. + + my ( $fh, # file handle + ) = @_; + + # Returns list. + + my ( $block, $name, $qual, $scores, $entry ); + + local $/ = "\n>"; + + while ( $block = <$fh> ) + { + chomp $block; + + last if $block !~ /^\s+$/; + } + + return if not defined $block; + + $block =~ /^>?(.+)\n/m; + $name = $1; + $qual = $'; + + local $/ = "\n"; + + chomp $qual; + + $name =~ tr/\r//d; + $qual =~ tr/ \n\r/;;;/; + $qual =~ s/;;/;/g; + + $scores = Maasha::Fastq::dec_str2solexa_str( $qual ); + + $entry = [ $name, $scores ]; + + return wantarray ? @{ $entry } : $entry; +} + + +sub check_names +{ + # Martin A. Hansen, May 2010. + + # Check if the names of the fasta and qual entries match + # and raise an error if they don't. + + my ( $fasta, # fasta entry + $qual, # qual entry + ) = @_; + + # Returns nothing. + + my ( $f_name, $q_name ); + + if ( $fasta->[ 0 ] =~ /^([^ ]+)/ ) { + $f_name = $1; + } + + if ( $qual->[ 0 ] =~ /^([^ ]+)/ ) { + $q_name = $1; + } + + Maasha::Common::error( qq(names don't match "$fasta->[ 0 ]" ne "$qual->[ 0 ]") ) if $f_name ne $q_name; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_bin/write_454 b/bp_bin/write_454 new file mode 100755 index 0000000..0a0eba6 --- /dev/null +++ b/bp_bin/write_454 @@ -0,0 +1,117 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Write sequences and scores from stream to FASTA and QUAL files. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Maasha::Biopieces; +use Maasha::Fastq; +use Maasha::Fasta; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, $data_out, $qual_out, $entry ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'data_out', short => 'o', type => 'file', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'qual_out', short => 'q', type => 'file', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); +$qual_out = Maasha::Biopieces::write_stream( $options->{ "qual_out" }, $options->{ "compress" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) + { + Maasha::Fasta::put_entry( $entry, $data_out ); + + qual_put_entry( $entry->[ 0 ], $record->{ 'SCORES' }, $qual_out ); + } + + Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; +} + +close $data_out; +close $qual_out; + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub qual_put_entry +{ + # Martin A. Hansen, May 2010. + + # Output a score string in 454 quality format. + + my ( $name, # entry name + $score_str, # scores in Solexa format. + $fh, # output filehandle + ) = @_; + + # Returns nothing. + + $score_str = Maasha::Fastq::solexa_str2dec_str( $score_str ); + $score_str =~ tr/;/ /; + + print $fh ">$name\n"; + print $fh "$score_str\n"; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_conf/bashrc b/bp_conf/bashrc index 62e6d22..17d9c1a 100644 --- a/bp_conf/bashrc +++ b/bp_conf/bashrc @@ -30,7 +30,7 @@ export PERL5LIB="$PERL5LIB:$BP_PERL" ### Here we add the Biopieces Ruby libraries to RUBYLIB. -export RUBYLIB="$RUBYLIB:$BP_RUBY" +export RUBYLIB="$RUBYLIB:$BP_RUBY/Maasha/lib" ### >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/code_perl/Maasha/Fastq.pm b/code_perl/Maasha/Fastq.pm index 80fea57..f65cee2 100644 --- a/code_perl/Maasha/Fastq.pm +++ b/code_perl/Maasha/Fastq.pm @@ -316,6 +316,24 @@ sub phred_str2dec_str } +sub dec_str2solexa_str +{ + # Martin A. Hansen, May 2010. + + # Converts a ; separated string of decimal scores to a + # string of Solexa octal scores. + + my ( $scores, # Decimal score string + ) = @_; + + # Returns a string. + + $scores =~ s/(\d{1,2});?/dec2solexa( $1 )/eg; + + return $scores; +} + + sub get_entry { # Martin A. Hansen, July 2009. diff --git a/code_ruby/Maasha/lib/biopieces.rb b/code_ruby/Maasha/lib/biopieces.rb new file mode 100644 index 0000000..f84f0b2 --- /dev/null +++ b/code_ruby/Maasha/lib/biopieces.rb @@ -0,0 +1,555 @@ +require 'optparse' +require 'open3' +require 'pp' + +# Error class for all exceptions to do with option casts. +class CastError < StandardError +end + +# Class using OptionParser to parse command line options according to a list of +# casts. Each cast prescribes the long and short name of the option, the type, +# if it is mandatory, the default value, and allowed and disallowed values. An +# optional list of extra casts can be supplied, and the integrity of the casts +# are checked. Following the command line parsing, the options are checked +# according to the casts. +class Biopieces + TYPES = %w[flag string list int uint float file file! files files! dir dir! genome] + MANDATORY = %w[long short type mandatory default allowed disallowed] + REGEX_LIST = /^(list|files|files!)$/ + REGEX_INT = /^(int|uint)$/ + REGEX_STRING = /^(string|file|file!|dir|dir!|genome)$/ + + # Check the integrity of a list of casts, followed by parsion options from argv + # and finally checking the options according to the casts. Returns nil if + # argv is empty, otherwise an options hash. + def parse(argv,casts=[],script_path=$0) + @casts = casts + @script_path = script_path + + cast_ubiquitous + cast_check + + @options = {} + + options_template = OptionParser.new do |option| + @casts.each do |cast| + if cast[:type] == 'flag' + option.on("-#{cast[:short]}", "--#{cast[:long]}") do |o| + @options[cast[:long]] = o + end + elsif cast[:type] =~ REGEX_LIST + option.on( "-#{cast[:short]}", "--#{cast[:long]} A", Array) do |a| + @options[cast[:long]] = a + end + elsif cast[:type] =~ REGEX_INT + option.on("-#{cast[:short]}", "--#{cast[:long]} I", Integer) do |i| + @options[cast[:long]] = i + end + elsif cast[:type] =~ REGEX_STRING + option.on("-#{cast[:short]}", "--#{cast[:long]} S", String) do |s| + @options[cast[:long]] = s + end + elsif cast[:type] == 'float' + option.on("-#{cast[:short]}", "--#{cast[:long]} F", Float) do |f| + @options[cast[:long]] = f + end + else + raise ArgumentError, "Unknown option type: '#{cast[:type]}'" + end + end + end + + options_template.parse!(argv) + + if print_usage_full? + print_usage_and_exit(true) + return # break for unit testing. + elsif print_usage_short? + print_usage_and_exit + return # break for unit testing. + end + + options_default + options_glob + options_check + + @options + end + + private + + # Given the script name determine the path of the wiki file with the usage info. + def wiki_path + path = ENV["BP_DIR"] + "/bp_usage/" + File.basename(@script_path, ".rb") + ".wiki" + raise "No such wiki file: #{path}" unless File.file? path + + path + end + + # Add ubiquitous options casts. + def cast_ubiquitous + @casts << {:long => 'help', :short => '?', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil} + @casts << {:long => 'stream_in', :short => 'I', :type => 'files!', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil} + @casts << {:long => 'stream_out', :short => 'O', :type => 'file', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil} + @casts << {:long => 'verbose', :short => 'v', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil} + end + + # Check integrity of the casts. + def cast_check + cast_check_keys + cast_check_values + cast_check_duplicates + end + + # Check if all mandatory keys are present in casts and raise if not. + def cast_check_keys + @casts.each do |cast| + MANDATORY.each do |mandatory| + raise CastError, "Missing symbol in cast: '#{mandatory.to_sym}'" unless cast.has_key? mandatory.to_sym + end + end + end + + # Check if all values in casts are valid. + def cast_check_values + @casts.each do |cast| + cast_check_val_long(cast) + cast_check_val_short(cast) + cast_check_val_type(cast) + cast_check_val_mandatory(cast) + cast_check_val_default(cast) + cast_check_val_allowed(cast) + cast_check_val_disallowed(cast) + end + end + + # Check if the values to long are legal and raise if not. + def cast_check_val_long(cast) + unless cast[:long].is_a? String and cast[:long].length > 1 + raise CastError, "Illegal cast of long: '#{cast[:long]}'" + end + end + + # Check if the values to short are legal and raise if not. + def cast_check_val_short(cast) + unless cast[:short].is_a? String and cast[:short].length == 1 + raise CastError, "Illegal cast of short: '#{cast[:short]}'" + end + end + + # Check if values to type are legal and raise if not. + def cast_check_val_type(cast) + type_hash = {} + TYPES.each do |type| + type_hash[type] = true + end + + unless type_hash.has_key? cast[:type] + raise CastError, "Illegal cast of type: '#{cast[:type]}'" + end + end + + # Check if values to mandatory are legal and raise if not. + def cast_check_val_mandatory(cast) + unless cast[:mandatory] == true or cast[:mandatory] == false + raise CastError, "Illegal cast of mandatory: '#{cast[:mandatory]}'" + end + end + + # Check if values to default are legal and raise if not. + def cast_check_val_default(cast) + unless cast[:default].nil? or + cast[:default].is_a? String or + cast[:default].is_a? Integer or + cast[:default].is_a? Float + raise CastError, "Illegal cast of default: '#{cast[:default]}'" + end + end + + # Check if values to allowed are legal and raise if not. + def cast_check_val_allowed(cast) + unless cast[:allowed].is_a? String or cast[:allowed].nil? + raise CastError, "Illegal cast of allowed: '#{cast[:allowed]}'" + end + end + + # Check if values to disallowed are legal and raise if not. + def cast_check_val_disallowed(cast) + unless cast[:disallowed].is_a? String or cast[:disallowed].nil? + raise CastError, "Illegal cast of disallowed: '#{cast[:disallowed]}'" + end + end + + # Check cast for duplicate long or short options names. + def cast_check_duplicates + check_hash = {} + @casts.each do |cast| + raise CastError, "Duplicate argument: '--#{cast[:long]}'" if check_hash.has_key? cast[:long] + raise CastError, "Duplicate argument: '-#{cast[:short]}'" if check_hash.has_key? cast[:short] + check_hash[cast[:long]] = true + check_hash[cast[:short]] = true + end + end + + # Check if full "usage info" should be printed. + def print_usage_full? + @options["help"] + end + + # Check if short "usage info" should be printed. + def print_usage_short? + if not $stdin.tty? + return false + elsif @options["stream_in"] + return false + elsif @options["data_in"] + return false + elsif wiki_path =~ /^(list_biopieces|list_genomes|list_mysql_databases|biostat)$/ # TODO get rid of this! + return false + else + return true + end + end + + # Print usage info by Calling an external script 'print_wiki' + # using a system() call and exit. An optional 'full' flag + # outputs the full usage info. + def print_usage_and_exit(full=nil) + if full + system("print_wiki --data_in #{wiki_path} --help") + else + system("print_wiki --data_in #{wiki_path}") + end + + raise "Failed printing wiki: #{wiki_path}" unless $?.success? + + exit + end + + # Set default options value from cast unless a value is set. + def options_default + @casts.each do |cast| + if cast[:default] + @options[cast[:long]] = cast[:default] unless @options.has_key? cast[:long] + end + end + end + + # Expands glob expressions to a full list of paths. + # Examples: "*.fna" or "foo.fna,*.fna" or "foo.fna,/bar/*.fna" + def options_glob + @casts.each do |cast| + if cast[:type] == 'files' or cast[:type] == 'files!' + if @options.has_key? cast[:long] + files = [] + + @options[cast[:long]].each do |path| + if path.include? "*" + Dir.glob(path).each do |file| + files << file if File.file? file + end + else + files << path + end + end + + @options[cast[:long]] = files + end + end + end + end + + # Check all options according to casts. + def options_check + @casts.each do |cast| + options_check_mandatory(cast) + options_check_int(cast) + options_check_uint(cast) + options_check_file(cast) + options_check_files(cast) + options_check_dir(cast) + options_check_allowed(cast) + options_check_disallowed(cast) + end + end + + # Check if a mandatory option is set and raise if it isn't. + def options_check_mandatory(cast) + if cast[:mandatory] + raise ArgumentError, "Mandatory argument: --#{cast[:long]}" unless @options.has_key? cast[:long] + end + end + + # Check int type option and raise if not an integer. + def options_check_int(cast) + if cast[:type] == 'int' and @options.has_key? cast[:long] + unless @options[cast[:long]].is_a? Integer + raise ArgumentError, "Argument to --#{cast[:long]} must be an integer, not '#{@options[cast[:long]]}'" + end + end + end + + # Check uint type option and raise if not an unsinged integer. + def options_check_uint(cast) + if cast[:type] == 'uint' and @options.has_key? cast[:long] + unless @options[cast[:long]].is_a? Integer and @options[cast[:long]] >= 0 + raise ArgumentError, "Argument to --#{cast[:long]} must be an unsigned integer, not '#{@options[cast[:long]]}'" + end + end + end + + # Check file! type argument and raise if file don't exists. + def options_check_file(cast) + if cast[:type] == 'file!' and @options.has_key? cast[:long] + raise ArgumentError, "No such file: '#{@options[cast[:long]]}'" unless File.file? @options[cast[:long]] + end + end + + # Check files! type argument and raise if files don't exists. + def options_check_files(cast) + if cast[:type] == 'files!' and @options.has_key? cast[:long] + @options[cast[:long]].each do |path| + raise ArgumentError, "No such file: '#{path}'" unless File.file? path + end + end + end + + # Check dir! type argument and raise if directory don't exist. + def options_check_dir(cast) + if cast[:type] == 'dir!' and @options.has_key? cast[:long] + raise ArgumentError, "No such directory: '#{@options[cast[:long]]}'" unless File.directory? @options[cast[:long]] + end + end + + # Check options and raise unless allowed. + def options_check_allowed(cast) + if cast[:allowed] and @options.has_key? cast[:long] + allowed_hash = {} + cast[:allowed].split(',').each { |a| allowed_hash[a] = 1 } + + raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} not allowed" unless allowed_hash.has_key? @options[cast[:long]] + end + end + + # Check disallowed argument values and raise if disallowed. + def options_check_disallowed(cast) + if cast[:disallowed] and @options.has_key? cast[:long] + cast[:disallowed].split(',').each do |val| + raise ArgumentError, "Argument '#{@options[cast[:long]]}' to --#{cast[:long]} is disallowed" if val == @options[cast[:long]] + end + end + end +end + + +class Stream + def initialize(options) + @options = options + @in = stream_in_open + @out = stream_out_open + end + + def each_record + record = {} + + @in.each_line do |line| + case line + when /^([^:]+): (.*)$/ + record[$1] = $2 + when /^---$/ + yield record unless record.empty? + record = {} + else + raise "Bad record format: #{line}" + end + end + + yield record unless record.empty? + + self # conventionally + end + + alias :each :each_record + + def puts(record) + record.each do |key,value| + @out.print "#{key}: #{value}\n" + end + + @out.print "---\n" + end + + private + + # Open Biopieces input data stream for reading from either + # stdin or from a list of files specified in options["stream_in"]. + def stream_in_open + if not $stdin.tty? + stream = $stdin + else + stream = read(@options["stream_in"]) + end + + stream + end + + # Open Biopieces output data stream for writing to stdout + # or a file specified in options["stream_out"]. + def stream_out_open + if @options["stream_out"] + stream = write(@options["stream_out"], @options["compress"]) + else + stream = $stdout + end + + stream + end + + # Opens a reads stream to a list of files. + def read(files) + if zipped?(files) + stream = zread(files) + else + stream = nread(files) + end + + stream + end + + # Opens a write stream to a file and returns a _io_ object. + def write(file, zip=nil) + zip ? zwrite(file) : nwrite(file) + end + + # Test if a list of files are gzipped or not. + # Raises if files are mixed zipped and unzipped. + def zipped?(files) + type_hash = {} + + files.each do |file| + type = `file #{file}` + + if type =~ /gzip compressed/ + type_hash[:gzip] = true + else + type_hash[:ascii] = true + end + end + + raise "Mixture of zipped and unzipped files" if type_hash.size == 2 + + type_hash[:gzip] + end + + # Opens a list of gzipped files for reading and return an _io_ object. + def zread(files) + stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' ')); + stdin.close + stderr.close + stdout + end + + # Opens a file for gzipped writing and return an _io_ object. + def zwrite(file) + stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}") + stderr.close + stdout.close + stdin + end + + # Opens a list of files for reading and return an _io_ object. + def nread(files) + stdin, stdout, stderr = Open3.popen3("cat " + files.join(' ')); + stdin.close + stderr.close + stdout + end + + # Opens a file for writing and return an _io_ object. + def nwrite(file) + File.open(file, mode="w") + end +end + + +class Status + def initialize + status_set + end + + def status_set + now = Time.new + time = now.strftime("%Y-%m-%d %X") + user = ENV["USER"] + script = File.basename($0, ".rb") + pid = $$ + path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") + + File.open(path, mode="w") { |file| file.puts [time, ARGV.join(" ")].join(";") } + end + + def log(status="OK") + now = Time.new + time1 = now.strftime("%Y-%m-%d %X") + user = ENV["USER"] + script = File.basename($0, ".rb") + pid = $$ + path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") + + stream = File.open(path) + time0, args, tmp_dir = stream.first.split(";") + + # Dir.rmdir(tmp_dir) unless tmp_dir.nil? and File.directory? tmp_dir #TODO fix this! + + elap = time_diff(time0, time1) + command = [script, args].join(" ") + log_file = ENV["BP_LOG"] + "/biopieces.log" + + File.open(log_file, mode="a") { |file| file.puts [time0, time1, elap, user, status, command].join("\t") } + end + + private + + def time_diff(t0, t1) + t0 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/ + year0 = $1.to_i + mon0 = $2.to_i + day0 = $3.to_i + hour0 = $4.to_i + min0 = $5.to_i + sec0 = $6.to_i + + sec0 += day0 * 24 * 60 * 60 + sec0 += hour0 * 60 * 60 + sec0 += min0 * 60 + + t1 =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/ + year1 = $1.to_i + mon1 = $2.to_i + day1 = $3.to_i + hour1 = $4.to_i + min1 = $5.to_i + sec1 = $6.to_i + + sec1 += day1 * 24 * 60 * 60 + sec1 += hour1 * 60 * 60 + sec1 += min1 * 60 + + year = year1 - year0 + mon = mon1 - mon0 + day = day1 - day0 + + sec = sec1 - sec0 + + hour = ( sec / ( 60 * 60 ) ).to_i + sec -= hour * 60 * 60 + + min = ( sec / 60 ).to_i + sec -= min * 60 + + [sprintf("%02d", hour), sprintf("%02d", min), sprintf("%02d", sec)].join(":") + end +end + + +__END__ diff --git a/code_ruby/Maasha/test/test_biopieces.rb b/code_ruby/Maasha/test/test_biopieces.rb new file mode 100755 index 0000000..14a83c8 --- /dev/null +++ b/code_ruby/Maasha/test/test_biopieces.rb @@ -0,0 +1,326 @@ +#!/usr/bin/env ruby +# $:.unshift File.join(File.dirname(__FILE__),'..','lib') + +require 'test/unit' +require 'mocha' +require 'biopieces' +require 'pp' + +TYPES = %w[flag string list int uint float file file! files files! dir dir! genome] +SCRIPT_PATH = "write_fasta.rb" + +class OptionTest < Test::Unit::TestCase + + # >>>>>>>>>>>>>>>>>>>> Testing Options.new <<<<<<<<<<<<<<<<<<<< + + test "Options.new with no argument don't raise" do + assert_nothing_raised { Options.new } + end + + test "Options.new with empty list don't raise" do + assert_nothing_raised { Options.new([]) } + end + + test "Options.new with non-array argument raises" do + %w[ "foo", 1, 1.2, {}, nil, false, true ].each do |arg| + assert_raise(TypeError) { Options.new(arg) } + end + end + + test "Options.new with missing cast key raises" do + assert_raise(CastError) { Options.new([{}]) } + end + + test "Options.new with all cast keys don't raise" do + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + + test "Options.new with illegal long cast values raises" do + [nil, true, false, 1, 0, "a"].each do |long| + casts = [{:long=>long, :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(CastError) { Options.new(casts) } + end + end + + test "Options.new with legal long cast values don't raise" do + ["foo", "!!", "0123"].each do |long| + casts = [{:long=>long, :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + end + + test "Options.new with illegal short cast values raises" do + [nil, true, false, "foo"].each do |short| + casts = [{:long=>"foo", :short=>short, :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(CastError) { Options.new(casts) } + end + end + + test "Options.new with legal short cast values don't raise" do + ["!", "1", "a"].each do |short| + casts = [{:long=>"foo", :short=>short, :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + end + + test "Options.new with illegal type cast values raises" do + [nil, true, false, "foo", 12, 0].each do |type| + casts = [{:long=>"foo", :short=>"f", :type=>type, :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(CastError) { Options.new(casts) } + end + end + + test "Options.new with legal type cast values don't raise" do + TYPES.each do |type| + casts = [{:long=>"foo", :short=>"f", :type=>type, :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + end + + test "Options.new with illegal mandatory cast values raises" do + ["yes", 12, 0, nil].each do |mandatory| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>mandatory, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(CastError) {Options.new(casts) } + end + end + + test "Options.new with legal mandatory cast values don't raise" do + [true, false].each do |mandatory| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>mandatory, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(CastError) {Options.new(casts) } + end + end + + test "Options.new with illegal default cast values raises" do + [true, false, [], {}].each do |default| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>default, :allowed=>nil, :disallowed=>nil}] + assert_raise(CastError) { Options.new(casts) } + end + end + + test "Options.new with legal default cast values don't raise" do + ["foo", nil, 0, 0.0, 1, 1.2].each do |default| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>default, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + end + + test "Options.new with illegal allowed cast values raises" do + [true, false, {}, [], 0, 0.1].each do |allowed| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>allowed, :disallowed=>nil}] + assert_raise(CastError) { Options.new(casts) } + end + end + + test "Options.new with legal allowed cast values don't raise" do + ["foo,bar",nil].each do |allowed| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>allowed, :disallowed=>nil}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + end + + test "Options.new with illegal disallowed cast values raises" do + [true, false, {}, [], 0, 0.1].each do |disallowed| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>disallowed}] + assert_raise(CastError) { Options.new(casts) } + end + end + + test "Options.new with legal disallowed cast values don't raise" do + ["foo,bar",nil].each do |disallowed| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>disallowed}] + assert_nothing_raised(CastError) { Options.new(casts) } + end + end + + test "Options.new with duplicate long cast values raises" do + casts = [] + casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + casts << {:long=>"foo", :short=>"b", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + assert_raise(CastError) { Options.new(casts) } + end + + test "Options.new with duplicate short cast values raises" do + casts = [] + casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + casts << {:long=>"bar", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + assert_raise(CastError) { Options.new(casts) } + end + + test "Options.new without duplicate long and short cast values don't raise" do + casts = [] + casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + casts << {:long=>"bar", :short=>"b", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + assert_nothing_raised(CastError) { Options.new(casts) } + end + + # >>>>>>>>>>>>>>>>>>>> Testing Options.parse <<<<<<<<<<<<<<<<<<<< + + test "Options.parse with empty argv and missing wiki file raises" do + opt_parser = Options.new + assert_raise() { opt_parser.parse([], "foo") } + end + + test "Options.parse with empty argv and existing wiki file don't raise" do + opt_parser = Options.new + opt_parser.expects(:print_usage_and_exit).with() + assert_nothing_raised { opt_parser.parse([], SCRIPT_PATH) } + end + + test "Options.parse with --help in argv and existing wiki output long usage" do + opt_parser = Options.new + opt_parser.expects(:print_usage_and_exit).with(true) + assert_nothing_raised { opt_parser.parse(["--help"],SCRIPT_PATH) } + end + +# # FIXME This one fails because any argument to a flag is ignored and the flag value is set to true. Should it raise? +# test "Options.parse with type cast flag with an argument raises" do +# casts = [{:long=>"foo", :short=>"f", :type=>"flag", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] +# opt_parser = Options.new(casts) +# assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bar"],SCRIPT_PATH) } +# end + + test "Options.parse with --stream_in returns options['stream_in']=>[]" do + opt_parser = Options.new + options = opt_parser.parse(["--stream_in", __FILE__],SCRIPT_PATH) + assert_equal([__FILE__], options["stream_in"]) + end + + test "Options.parse with -I returns options['stream_in']=>[]" do + opt_parser = Options.new + options = opt_parser.parse(["-I", __FILE__],SCRIPT_PATH) + assert_equal([__FILE__], options["stream_in"]) + end + + test "Options.parse use cast default value if no argument given" do + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + options = opt_parser.parse(["-I", __FILE__],SCRIPT_PATH) + assert_equal(options["foo"], "bar") + end + + test "Options.parse don't use default value if argument given" do + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + options = opt_parser.parse(["--foo", "bleh", "-I", __FILE__],SCRIPT_PATH) + assert_equal(options["foo"], "bleh") + end + + test "Options.parse with mandatory cast and no argument raises" do + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_raise(ArgumentError) { opt_parser.parse(["-I", __FILE__],SCRIPT_PATH) } + end + + test "Options.parse with mandatory cast and argument don't raise" do + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "bar", "-I", __FILE__],SCRIPT_PATH) } + end + +# # This one results in an error: "OptionParser::InvalidArgument: invalid argument: --foo bar" +# # So it appears that this is tested in OptionParser already. +# test "Options.parse with type cast int and non-int value raises" do +# ["bar" ].each do |val| # what about nil, false, true, [], {}, 0.1 ? +# casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] +# opt_parser = Options.new(casts) +# assert_raise(ArgumentError) { opt_parser.parse(["--foo", "#{val}"],SCRIPT_PATH) } +# end +# end + + test "Options.parse with type cast int don't raise" do + [0,-1,1,327649123746293746374276347824].each do |val| + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) } + end + end + + # TODO similar test for uint as "test "Options.parse with type cast int and non-int value raises" do" + + test "Options.parse with type cast uint don't raise" do + [0,1,327649123746293746374276347824].each do |val| + casts = [{:long=>"foo", :short=>"f", :type=>"uint", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) } + end + end + + test "Options.parse with file! cast and file don't exists raises" do + casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bleh", "-I", __FILE__],SCRIPT_PATH) } + end + + test "Options.parse with file! cast and existing file don't raise" do + casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", __FILE__, "-I", __FILE__],SCRIPT_PATH) } + end + + test "Options.parse with files! cast and a file don't exists raises" do + casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_raise(ArgumentError) { opt_parser.parse(["--foo", __FILE__ + ",bleh", "-I", __FILE__],SCRIPT_PATH) } + end + + test "Options.parse with files! cast and files exists don't raise" do + casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", __FILE__ + "," + __FILE__, "-I", __FILE__],SCRIPT_PATH) } + end + + # TODO replace the absolute part below the file location with File.dirname(__FILE__) + test "Options.parse with glob argument expands correctly" do + casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + argv = ["--foo", "/Users/maasha/unit_test/foo*,/Users/maasha/unit_test/my_dir/*.fna", "-I", __FILE__] + opt_parser = Options.new(casts) + options = opt_parser.parse(argv,SCRIPT_PATH) + assert_equal(options["foo"], ["/Users/maasha/unit_test/foo.fna", "/Users/maasha/unit_test/my_dir/bar.fna"]) + end + + test "Options.parse with dir! cast and dir don't exists raises" do + casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bleh", "-I", __FILE__],SCRIPT_PATH) } + end + + test "Options.parse with dir! cast and dir exists don't raise" do + casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "/", "-I", __FILE__],SCRIPT_PATH) } + end + + test "Options.parse with allowed cast and not allowed value raises" do + ["bleh", "2", "3.3"].each do |val| + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_raise(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) } + end + end + + test "Options.parse with allowed cast and allowed values don't raise" do + ["0", "-1", "0.0", "1", "bar"].each do |val| + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) } + end + end + + test "Options.parse with disallowed cast and disallowed value raises" do + ["0", "-1", "0.0", "1", "bar"].each do |val| + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}] + opt_parser = Options.new(casts) + assert_raise(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) } + end + end + + test "Options.parse with disallowed cast and allowed values don't raise" do + ["bleh", "2", "3.3"].each do |val| + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}] + opt_parser = Options.new(casts) + assert_nothing_raised(ArgumentError) { opt_parser.parse(["--foo", "#{val}", "-I", __FILE__],SCRIPT_PATH) } + end + end +end