From c4f14c511655d92281b6d70363de57b77a9b6045 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 23 May 2011 10:10:30 +0000 Subject: [PATCH] committing major ruby overhaul git-svn-id: http://biopieces.googlecode.com/svn/trunk@1413 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/analyze_assembly | 37 +-- bp_bin/analyze_seq | 28 +- bp_bin/bin_vals | 14 +- bp_bin/calc_N50 | 29 +- bp_bin/clip_adaptor | 18 +- bp_bin/clip_seq | 34 +-- bp_bin/digest_seq | 44 +-- bp_bin/find_adaptor | 37 +-- bp_bin/find_genes | 40 +-- bp_bin/find_homopolymers | 17 +- bp_bin/find_mids | 38 +-- bp_bin/join_seq | 29 +- bp_bin/kmer_freq | 28 +- bp_bin/length_seq | 12 +- bp_bin/mask_seq | 10 +- bp_bin/pcr_seq | 57 ++-- bp_bin/plot_scores | 22 +- bp_bin/progress_meter | 15 +- bp_bin/read_fasta | 38 +-- bp_bin/read_fastq | 42 +-- bp_bin/read_genbank | 39 +-- bp_bin/read_sff | 46 +-- bp_bin/remove_mids | 31 +- bp_bin/scores_to_dec | 12 +- bp_bin/shred_seq | 19 +- bp_bin/shuffle_records | 18 +- bp_bin/swapcase_seq | 18 +- bp_bin/uclust_seq | 44 +-- bp_test/out/analyze_assembly.out.1 | 7 - bp_test/out/calc_N50.out.1 | 2 - code_ruby/lib/maasha/biopieces.rb | 249 ++++++++-------- code_ruby/test/maasha/test_biopieces.rb | 361 ++++++++++++------------ 32 files changed, 702 insertions(+), 733 deletions(-) diff --git a/bp_bin/analyze_assembly b/bp_bin/analyze_assembly index bcc727c..7fe7bd6 100755 --- a/bp_bin/analyze_assembly +++ b/bp_bin/analyze_assembly @@ -40,27 +40,27 @@ casts << {:long=>'procedure', :short=>'p', :type=>'string', :mandatory=>true, : casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) options[:full] = true; total = 0 lengths = [] -tmpdir = bp.mktmpdir -infile = "#{tmpdir}/in.fna" -outfile = "#{tmpdir}/out.prodigal" - -Fasta.open(infile, mode="w") do |fasta_io| - bp.each_record do |record| - if record.has_key? :SEQ - total += record[:SEQ].length - lengths << record[:SEQ].length +tmpdir = Biopieces.mktmpdir +infile = File.join(tmpdir, "in.fna") +outfile = File.join(tmpdir, "out.prodigal") + +Fasta.open(infile, mode="w") do |fasta_output| + Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + total += record[:SEQ].length + lengths << record[:SEQ].length + end + + output.puts record unless options[:no_stream] + fasta_output.puts record end - - bp.puts record unless options[:no_stream] - fasta_io.puts record end end @@ -82,8 +82,6 @@ lengths.sort.reverse.each do |length| end end -bp.out = Stream.write(options[:data_out]) if options[:data_out] - new_record = {} new_record[:N50] = n50 new_record[:MAX] = lengths.max @@ -93,7 +91,10 @@ new_record[:TOTAL] = total new_record[:COUNT] = lengths.size new_record[:GENE_COV] = gene_cov if options[:gene_cov] -bp.puts new_record +Biopieces.open(nil, options[:data_out]) do |input, output| + output.puts new_record +end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/analyze_seq b/bp_bin/analyze_seq index 303d5e4..f54b905 100755 --- a/bp_bin/analyze_seq +++ b/bp_bin/analyze_seq @@ -31,33 +31,31 @@ require 'maasha/biopieces' require 'maasha/seq' -require 'pp' casts = [] -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + seq = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SEQ_TYPE], record[:SCORE]) + comp = seq.composition -bp.each_record do |record| - if record.has_key? :SEQ - seq = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SEQ_TYPE], record[:SCORE]) - comp = seq.composition + comp.each_pair do |key,val| + record["RES[#{key}]"] = val + end - comp.each_pair do |key,val| - record["RES[#{key}]"] = val + record["SOFT_MASK%"] = seq.soft_mask + record["HARD_MASK%"] = (comp["N"].to_f / (seq.len - seq.indels).to_f * 100.0).round(2) + record["GC%"] = ((comp["G"] + comp["C"]).to_f / (seq.len - seq.indels).to_f * 100.0).round(2) end - record["SOFT_MASK%"] = seq.soft_mask - record["HARD_MASK%"] = (comp["N"].to_f / (seq.len - seq.indels).to_f * 100.0).round(2) - record["GC%"] = ((comp["G"] + comp["C"]).to_f / (seq.len - seq.indels).to_f * 100.0).round(2) + output.puts record end - - bp.puts record end - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/bin_vals b/bp_bin/bin_vals index 5f1fd05..db9e3b4 100755 --- a/bp_bin/bin_vals +++ b/bp_bin/bin_vals @@ -35,16 +35,16 @@ casts = [] casts << {:long=>'key', :short=>'k', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'bin_size', :short=>'b', :type=>'uint', :mandatory=>true, :default=>10, :allowed=>nil, :disallowed=>'0'} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? options[:key] + record[(options[:key].to_s + "_BIN").to_sym] = (record[options[:key]].to_i / options[:bin_size]) * options[:bin_size] + end -bp.each_record do |record| - if record.has_key? options[:key] - record[(options[:key].to_s + "_BIN").to_sym] = (record[options[:key]].to_i / options[:bin_size]) * options[:bin_size] + output.puts record end - - bp.puts record end diff --git a/bp_bin/calc_N50 b/bp_bin/calc_N50 index 38de308..3e56af9 100755 --- a/bp_bin/calc_N50 +++ b/bp_bin/calc_N50 @@ -30,41 +30,44 @@ require 'maasha/biopieces' -require 'pp' casts = [] casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) total = 0 lengths = [] -bp.each_record do |record| - bp.puts record unless options[:no_stream] +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record unless options[:no_stream] - if record.has_key? :SEQ - total += record[:SEQ].length - lengths << record[:SEQ].length + if record.has_key? :SEQ + total += record[:SEQ].length + lengths << record[:SEQ].length + end end end -bp.out = Stream.write(options[:data_out]) if options[:data_out] - -count = 0 +new_record = {} +count = 0 lengths.sort.reverse.each do |length| count += length if count >= total * 0.50 - bp.puts "N50" => length + new_record["N50"] = length break end end +Biopieces.open(nil, options[:data_out]) do |input, output| + output.puts new_record +end + + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/clip_adaptor b/bp_bin/clip_adaptor index 2056f2d..5cb2fa5 100755 --- a/bp_bin/clip_adaptor +++ b/bp_bin/clip_adaptor @@ -33,18 +33,18 @@ require 'maasha/biopieces' casts = [] -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ and record.has_key? :ADAPTOR_POS + record[:SEQ] = record[:SEQ][0 ... record[:ADAPTOR_POS].to_i] + record[:SCORES] = record[:SCORES][0 ... record[:ADAPTOR_POS].to_i] if record[:SCORES] + record[:SEQ_LEN] = record[:SEQ].length + end -bp.each_record do |record| - if record.has_key? :SEQ and record.has_key? :ADAPTOR_POS - record[:SEQ] = record[:SEQ][0 ... record[:ADAPTOR_POS].to_i] - record[:SCORES] = record[:SCORES][0 ... record[:ADAPTOR_POS].to_i] if record[:SCORES] - record[:SEQ_LEN] = record[:SEQ].length + output.puts record end - - bp.puts record end diff --git a/bp_bin/clip_seq b/bp_bin/clip_seq index f26e10b..45a96c7 100755 --- a/bp_bin/clip_seq +++ b/bp_bin/clip_seq @@ -33,31 +33,31 @@ require 'maasha/biopieces' casts = [] -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + trim_beg = 0 + trim_end = record[:SEQ].length -bp.each_record do |record| - if record.has_key? :SEQ - trim_beg = 0 - trim_end = record[:SEQ].length + record[:SEQ] =~ /[^a-z]/ - record[:SEQ] =~ /[^a-z]/ + trim_beg = $`.length - trim_beg = $`.length + if record[:SEQ] =~ /[a-z]+$/ + trim_end = $`.length + else + trim_end = record[:SEQ].length + end - if record[:SEQ] =~ /[a-z]+$/ - trim_end = $`.length - else - trim_end = record[:SEQ].length + record[:SEQ] = record[:SEQ][trim_beg ... trim_end] + record[:SEQ_LEN] = record[:SEQ].length + record[:SCORES] = record[:SCORES][trim_beg ... trim_end] if record.has_key? :SCORES end - record[:SEQ] = record[:SEQ][trim_beg ... trim_end] - record[:SEQ_LEN] = record[:SEQ].length - record[:SCORES] = record[:SCORES][trim_beg ... trim_end] if record.has_key? :SCORES + output.puts record end - - bp.puts record end diff --git a/bp_bin/digest_seq b/bp_bin/digest_seq index 03e8ec9..7399ae0 100755 --- a/bp_bin/digest_seq +++ b/bp_bin/digest_seq @@ -37,30 +37,30 @@ casts = [] casts << {:long=>'pattern', :short=>'p', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'cut_pos', :short=>'c', :type=>'int', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - if record.has_key? :SEQ_NAME and record.has_key? :SEQ - seq = Seq.new(record[:SEQ_NAME], record[:SEQ]) - digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos]) - - digest.each do |subseq| - new_record = subseq.to_bp - - if new_record[:SEQ_NAME] =~ /\[(\d+)-(\d+)\]$/ - s_beg = $1 - s_end = $2 +options = Biopieces.options_parse(ARGV, casts) + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ_NAME and record.has_key? :SEQ + seq = Seq.new(record[:SEQ_NAME], record[:SEQ]) + digest = Digest.new(seq, options[:pattern].to_s, options[:cut_pos]) + + digest.each do |subseq| + new_record = subseq.to_bp + + if new_record[:SEQ_NAME] =~ /\[(\d+)-(\d+)\]$/ + s_beg = $1 + s_end = $2 + end + + new_record[:S_BEG] = s_beg + new_record[:S_END] = s_end + new_record[:REC_TYPE] = "DIGEST" + output.puts new_record end - - new_record[:S_BEG] = s_beg - new_record[:S_END] = s_end - new_record[:REC_TYPE] = "DIGEST" - bp.puts new_record + else + output.puts record end - else - bp.puts record end end diff --git a/bp_bin/find_adaptor b/bp_bin/find_adaptor index aa1a014..6319874 100755 --- a/bp_bin/find_adaptor +++ b/bp_bin/find_adaptor @@ -133,9 +133,7 @@ casts << {:long=>'pos', :short=>'p', :type=>'int', :mandatory=>fals casts << {:long=>'dist', :short=>'d', :type=>'uint', :mandatory=>false, :default=>0, :allowed=>nil, :disallowed=>nil} casts << {:long=>'cache', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) adaptor = options[:adaptor].to_s.upcase adaptor_disamb = disambiguate(adaptor) @@ -145,28 +143,31 @@ pos -= 1 if pos > 0 # pos was 1-based cache = {} -bp.each_record do |record| - if record.has_key? :SEQ - entry = Seq.new(record[:SEQ_NAME], record[:SEQ], "dna", record[:SCORES]) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + entry = Seq.new(record[:SEQ_NAME], record[:SEQ], "dna", record[:SCORES]) - if cache[entry.seq.upcase.to_sym] and options[:cache] - match = cache[entry.seq.upcase] - else - match = entry.adaptor_find(adaptor, adaptor_disamb, pos, options[:edit_distance], options[:dist]) + if cache[entry.seq.upcase.to_sym] and options[:cache] + match = cache[entry.seq.upcase] + else + match = entry.adaptor_find(adaptor, adaptor_disamb, pos, options[:edit_distance], options[:dist]) - cache[entry.seq.upcase.to_sym] = match if match and options[:cache] - end + cache[entry.seq.upcase.to_sym] = match if match and options[:cache] + end - if match - record[:ADAPTOR_POS] = match.pos - record[:ADAPTOR_LEN] = match.length - record[:ADAPTOR_MATCH] = match.match + if match + record[:ADAPTOR_POS] = match.pos + record[:ADAPTOR_LEN] = match.length + record[:ADAPTOR_MATCH] = match.match + end end - end - bp.puts record + output.puts record + end end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/find_genes b/bp_bin/find_genes index b365ab2..a7dda28 100755 --- a/bp_bin/find_genes +++ b/bp_bin/find_genes @@ -37,30 +37,30 @@ casts = [] casts << {:long=>'full', :short=>'f', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'procedure', :short=>'p', :type=>'string', :mandatory=>true, :default=>'single', :allowed=>'single,meta', :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) - -tmpdir = bp.mktmpdir -infile = "#{tmpdir}/in.fna" -outfile = "#{tmpdir}/out.prodigal" - -Fasta.open(infile, mode="w") do |fasta_io| - bp.each_record do |record| - bp.puts record - fasta_io.puts record +options = Biopieces.options_parse(ARGV, casts) + +tmpdir = Biopieces.mktmpdir +infile = File.join(tmpdir, "in.fna") +outfile = File.join(tmpdir, "out.prodigal") + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + Fasta.open(infile, mode="w") do |fasta_io| + input.each_record do |record| + output.puts record + fasta_io.puts record + end end -end -prodigal = Prodigal.new(infile, outfile, options) -prodigal.run + prodigal = Prodigal.new(infile, outfile, options) + prodigal.run -prodigal.each do |record| - record[:DEFINITION].match /seqhdr="([^"]+)/ do |m| - record[:S_ID] = $1 - end + prodigal.each do |record| + record[:DEFINITION].match /seqhdr="([^"]+)/ do |m| + record[:S_ID] = $1 + end - bp.puts record + output.puts record + end end diff --git a/bp_bin/find_homopolymers b/bp_bin/find_homopolymers index 5c9cd24..3cad86f 100755 --- a/bp_bin/find_homopolymers +++ b/bp_bin/find_homopolymers @@ -31,23 +31,22 @@ require 'maasha/biopieces' require 'maasha/seq' -require 'pp' casts = [] casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + seq = Seq.new(nil, record[:SEQ]) -bp.each_record do |record| - if record.has_key? :SEQ - seq = Seq.new(nil, record[:SEQ]) + record[:HOMOPOL_MAX] = seq.homopol_max(options[:min]) + end - record[:HOMOPOL_MAX] = seq.homopol_max(options[:min]) + output.puts record end - - bp.puts record end diff --git a/bp_bin/find_mids b/bp_bin/find_mids index 81015f6..52da69e 100755 --- a/bp_bin/find_mids +++ b/bp_bin/find_mids @@ -30,7 +30,6 @@ require 'maasha/biopieces' -require 'pp' MID_LEN = 10 @@ -80,34 +79,35 @@ end casts = [] casts << {:long=>'pos', :short=>'p', :type=>'uint', :mandatory=>false, :default=>0, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) pos = options[:pos] -bp.each_record do |record| - if record.has_key? :SEQ - tag = record[:SEQ][pos ... pos + MID_LEN].upcase +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + tag = record[:SEQ][pos ... pos + MID_LEN].upcase - if mid_hash.has_key? tag - count_hash[tag] += 1 + if mid_hash.has_key? tag + count_hash[tag] += 1 + end end + output.puts record end - bp.puts record -end -mids.each_with_index do |mid, i| - if count_hash[mid] > 0 - record = {} - record[:REC_TYPE] = "MID" - record[:MID_NUM] = i + 1 - record[:MID_COUNT] = count_hash[mid] - record[:MID_SEQ] = mid - bp.puts record + mids.each_with_index do |mid, i| + if count_hash[mid] > 0 + record = {} + record[:REC_TYPE] = "MID" + record[:MID_NUM] = i + 1 + record[:MID_COUNT] = count_hash[mid] + record[:MID_SEQ] = mid + output.puts record + end end end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/join_seq b/bp_bin/join_seq index de80ad2..f071f41 100755 --- a/bp_bin/join_seq +++ b/bp_bin/join_seq @@ -34,28 +34,29 @@ require 'maasha/seq' casts = [] -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) seq = Seq.new(nil, "") -bp.each_record do |record| - if record.has_key? :SEQ - unless seq.seq_name - seq.seq_name = record[:SEQ_NAME] +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + unless seq.seq_name + seq.seq_name = record[:SEQ_NAME] + end + seq.seq << record[:SEQ] end - seq.seq << record[:SEQ] + + output.puts record end - bp.puts record + if seq.seq != "" + new_record = seq.to_bp + new_record[:REC_TYPE] = "JOIN" + output.puts new_record + end end -if seq.seq != "" - new_record = seq.to_bp - new_record[:REC_TYPE] = "JOIN" - bp.puts new_record -end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/kmer_freq b/bp_bin/kmer_freq index 7e412d8..290f07d 100755 --- a/bp_bin/kmer_freq +++ b/bp_bin/kmer_freq @@ -31,34 +31,34 @@ require 'maasha/biopieces' require 'maasha/seq' -require 'pp' casts = [] casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'} casts << {:long=>'type', :short=>'t', :type=>'string', :mandatory=>false, :default=>"dna", :allowed=>"dna,rna,protein", :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) oligos = Seq.generate_oligos(options[:size], options[:type]) -bp.each_record do |record| - if record.has_key? :SEQ - kmers = {} - oligos.each { |oligo| kmers[oligo.upcase] = 0 } +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + kmers = {} + oligos.each { |oligo| kmers[oligo.upcase] = 0 } + + (0 ... record[:SEQ].length - options[:size]).each do |i| + kmer = record[:SEQ][i .. i + options[:size] - 1].upcase + kmers[kmer] += 1 if kmers[kmer] + end - (0 ... record[:SEQ].length - options[:size]).each do |i| - kmer = record[:SEQ][i .. i + options[:size] - 1].upcase - kmers[kmer] += 1 if kmers[kmer] + record.merge! kmers end - record.merge! kmers + output.puts record end - - bp.puts record end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/length_seq b/bp_bin/length_seq index 0152ef4..e9f8726 100755 --- a/bp_bin/length_seq +++ b/bp_bin/length_seq @@ -33,13 +33,13 @@ require 'maasha/biopieces' casts = [] -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - record[:SEQ_LEN] = record[:SEQ].length if record.has_key? :SEQ - bp.puts record +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + record[:SEQ_LEN] = record[:SEQ].length if record.has_key? :SEQ + output.puts record + end end diff --git a/bp_bin/mask_seq b/bp_bin/mask_seq index f29b68f..011be4d 100755 --- a/bp_bin/mask_seq +++ b/bp_bin/mask_seq @@ -58,12 +58,12 @@ end casts = [] casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - bp.puts record.mask_seq!(options[:cutoff]) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record.mask_seq!(options[:cutoff]) + end end diff --git a/bp_bin/pcr_seq b/bp_bin/pcr_seq index df97f20..dec3ea0 100755 --- a/bp_bin/pcr_seq +++ b/bp_bin/pcr_seq @@ -32,7 +32,6 @@ require 'maasha/biopieces' require 'maasha/fasta' require 'maasha/seq' -require 'pp' class Pcr def initialize(tmpdir, infile, options) @@ -157,39 +156,39 @@ casts << {:long=>'forward', :short=>'f', :type=>'string', :mandatory=>true, :de casts << {:long=>'reverse', :short=>'r', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'max_dist', :short=>'m', :type=>'uint', :mandatory=>true, :default=>5000, :allowed=>nil, :disallowed=>"0"} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) -tmpdir = bp.mktmpdir +options = Biopieces.options_parse(ARGV, casts) +tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") -Fasta.open(infile, mode="w") do |ios| - bp.each_record do |record| - bp.puts record - ios.puts record +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + Fasta.open(infile, mode="w") do |ios| + input.each_record do |record| + output.puts record + ios.puts record + end end -end -outfiles = Pcr.new(tmpdir, infile, options).run - -outfiles.each do |outfile| - Fasta.open(outfile, mode="r") do |ios| - ios.each do |entry| - record = entry.to_bp - record[:REC_TYPE] = "PCR" - record[:STRAND] = "+" - record[:TYPE] = File.basename(outfile).sub(".fna", "").upcase - record[:SEQ_NAME].match(/(.+):\[(\d+),(\d+)\]$/) - record[:SEQ_NAME] = $1 - record[:PCR_BEG] = $2 - record[:PCR_END] = $3 - - if record[:PCR_BEG] > record[:PCR_END] - record[:PCR_BEG], record[:PCR_END] = record[:PCR_END], record[:PCR_BEG] - record[:STRAND] = "-" + outfiles = Pcr.new(tmpdir, infile, options).run + + outfiles.each do |outfile| + Fasta.open(outfile, mode="r") do |ios| + ios.each do |entry| + record = entry.to_bp + record[:REC_TYPE] = "PCR" + record[:STRAND] = "+" + record[:TYPE] = File.basename(outfile).sub(".fna", "").upcase + record[:SEQ_NAME].match(/(.+):\[(\d+),(\d+)\]$/) + record[:SEQ_NAME] = $1 + record[:PCR_BEG] = $2 + record[:PCR_END] = $3 + + if record[:PCR_BEG] > record[:PCR_END] + record[:PCR_BEG], record[:PCR_END] = record[:PCR_END], record[:PCR_BEG] + record[:STRAND] = "-" + end + + output.puts record end - - bp.puts record end end end diff --git a/bp_bin/plot_scores b/bp_bin/plot_scores index fd44149..59cf3e3 100755 --- a/bp_bin/plot_scores +++ b/bp_bin/plot_scores @@ -42,25 +42,25 @@ casts << {:long=>'title', :short=>'T', :type=>'string', :mandatory=>false, : casts << {:long=>'xlabel', :short=>'X', :type=>'string', :mandatory=>false, :default=>xlabel, :allowed=>nil, :disallowed=>nil} casts << {:long=>'ylabel', :short=>'Y', :type=>'string', :mandatory=>false, :default=>ylabel, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) BASE_SOLEXA = 64 sum_hash = Hash.new(0) count_hash = Hash.new(0) -bp.each_record do |record| - if record[:SCORES] - scores = record[:SCORES] - (0 ... scores.length).each do |i| - sum_hash[i] += (scores[i].ord - BASE_SOLEXA) - count_hash[i] += 1 +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SCORES] + scores = record[:SCORES] + (0 ... scores.length).each do |i| + sum_hash[i] += (scores[i].ord - BASE_SOLEXA) + count_hash[i] += 1 + end end - end - bp.puts record unless options[:no_stream] + output.puts record unless options[:no_stream] + end end x = [] diff --git a/bp_bin/progress_meter b/bp_bin/progress_meter index 7678d37..49003a2 100755 --- a/bp_bin/progress_meter +++ b/bp_bin/progress_meter @@ -35,17 +35,18 @@ casts = [] casts << {:long=>'no_stream', :short => 'x', :type => 'flag', :mandatory => false, :default => nil, :allowed => nil, :disallowed => nil} casts << {:long=>'count', :short => 'c', :type => 'uint', :mandatory => false, :default => 1000, :allowed => nil, :disallowed => '0'} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_with_index do |record, count| + output.puts record unless options.has_key? :no_stream + $stderr.printf "\n% 9d ", count if (count % (options[:count] * 100)) == 0 + $stderr.print "." if (count % options[:count]) == 0 + end -bp.each_with_index do |record, count| - bp.puts record unless options.has_key? :no_stream - $stderr.printf "\n% 9d ", count if (count % (options[:count] * 100)) == 0 - $stderr.print "." if (count % options[:count]) == 0 + $stderr.puts end -$stderr.puts # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/read_fasta b/bp_bin/read_fasta index 9b77008..b267d30 100755 --- a/bp_bin/read_fasta +++ b/bp_bin/read_fasta @@ -35,32 +35,32 @@ casts = [] casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - bp.puts record -end +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record + end -num = 0 -last = false + num = 0 + last = false -if options.has_key? :data_in - options[:data_in].each do |file| - Fasta.open(file, mode='r') do |fasta| - fasta.each do |entry| - bp.puts entry.to_bp - num += 1 + if options.has_key? :data_in + options[:data_in].each do |file| + Fasta.open(file, mode='r') do |fasta| + fasta.each do |entry| + output.puts entry.to_bp + num += 1 - if options.has_key? :num and options[:num] == num - last = true - break + if options.has_key? :num and options[:num] == num + last = true + break + end end end - end - break if last + break if last + end end end diff --git a/bp_bin/read_fastq b/bp_bin/read_fastq index 063edd6..51dd56b 100755 --- a/bp_bin/read_fastq +++ b/bp_bin/read_fastq @@ -38,34 +38,34 @@ casts << {:long=>'solexa', :short=>'s', :type=>'flag', :mandatory=>false, :de PHRED_SCORES = Regexp.new('[!"#$%&\'()*+,-./0123456789:]') -bp = Biopieces.new - -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - bp.puts record -end +options = Biopieces.options_parse(ARGV, casts) num = 0 last = false -if options.has_key? :data_in - options[:data_in].each do |file| - Fastq.open(file, mode='r') do |fastq| - fastq.each do |entry| - entry.convert_phred2illumina! if entry.qual.match PHRED_SCORES - entry.convert_solexa2illumina! if options[:solexa] - bp.puts entry.to_bp - num += 1 - - if options.has_key? :num and options[:num] == num - last = true - break +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record + end + + if options.has_key? :data_in + options[:data_in].each do |file| + Fastq.open(file, mode='r') do |fastq| + fastq.each do |entry| + entry.convert_phred2illumina! if entry.qual.match PHRED_SCORES + entry.convert_solexa2illumina! if options[:solexa] + output.puts entry.to_bp + num += 1 + + if options.has_key? :num and options[:num] == num + last = true + break + end end end - end - break if last + break if last + end end end diff --git a/bp_bin/read_genbank b/bp_bin/read_genbank index 1ab92e4..d40f1aa 100755 --- a/bp_bin/read_genbank +++ b/bp_bin/read_genbank @@ -38,37 +38,38 @@ casts << {:long=>'keys', :short=>'k', :type=>'list', :mandatory=>false, casts << {:long=>'features', :short=>'f', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'qualifiers', :short=>'q', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) hash_keys = options[:keys].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:keys] hash_feats = options[:features].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:features] hash_quals = options[:qualifiers].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:qualifiers] -bp.each_record do |record| - bp.puts record -end -num = 0 -last = false +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record + end + + num = 0 + last = false -if options.has_key? :data_in - options[:data_in].each do |file| - Genbank.open(file, mode='r') do |gb| - gb.each(hash_keys, hash_feats, hash_quals) do |entry| - bp.puts entry + if options.has_key? :data_in + options[:data_in].each do |file| + Genbank.open(file, mode='r') do |gb| + gb.each(hash_keys, hash_feats, hash_quals) do |entry| + output.puts entry - num += 1 + num += 1 - if options.has_key? :num and options[:num] == num - last = true - break + if options.has_key? :num and options[:num] == num + last = true + break + end end end - end - break if last + break if last + end end end diff --git a/bp_bin/read_sff b/bp_bin/read_sff index 6e5febd..3de8b2e 100755 --- a/bp_bin/read_sff +++ b/bp_bin/read_sff @@ -37,34 +37,34 @@ casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :de casts << {:long=>'mask', :short=>'m', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'clip', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - bp.puts record -end +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record + end -num = 0 -last = false - -if options.has_key? :data_in - options[:data_in].each do |file| - SFF.open(file, mode='r') do |sff| - sff.each do |entry| - entry.mask if options[:mask] - entry.clip if options[:clip] - bp.puts entry.to_bp - num += 1 - - if options.has_key? :num and options[:num] == num - last = true - break + num = 0 + last = false + + if options.has_key? :data_in + options[:data_in].each do |file| + SFF.open(file, mode='r') do |sff| + sff.each do |entry| + entry.mask if options[:mask] + entry.clip if options[:clip] + output.puts entry.to_bp + num += 1 + + if options.has_key? :num and options[:num] == num + last = true + break + end end end - end - break if last + break if last + end end end diff --git a/bp_bin/remove_mids b/bp_bin/remove_mids index a2d1fd8..44714f8 100755 --- a/bp_bin/remove_mids +++ b/bp_bin/remove_mids @@ -79,28 +79,29 @@ end casts = [] casts << {:long=>'pos', :short=>'p', :type=>'uint', :mandatory=>false, :default=>0, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) pos = options[:pos] -bp.each_record do |record| - if record.has_key? :SEQ - tag = record[:SEQ][pos ... pos + MID_LEN].upcase - - if mid_hash[tag] - record[:SEQ] = record[:SEQ][pos + MID_LEN ... record[:SEQ].length] - record[:SCORES] = record[:SCORES][pos + MID_LEN ... record[:SCORES].length] if record[:SCORES] - record[:MID] = tag - record[:MID_NUM] = mid_hash[tag] + 1 - record[:SEQ_LEN] = record[:SEQ].length +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + tag = record[:SEQ][pos ... pos + MID_LEN].upcase + + if mid_hash[tag] + record[:SEQ] = record[:SEQ][pos + MID_LEN ... record[:SEQ].length] + record[:SCORES] = record[:SCORES][pos + MID_LEN ... record[:SCORES].length] if record[:SCORES] + record[:MID] = tag + record[:MID_NUM] = mid_hash[tag] + 1 + record[:SEQ_LEN] = record[:SEQ].length + end end - end - bp.puts record + output.puts record + end end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/scores_to_dec b/bp_bin/scores_to_dec index c5e7c1e..56381a5 100755 --- a/bp_bin/scores_to_dec +++ b/bp_bin/scores_to_dec @@ -50,14 +50,12 @@ class Hash end end -casts = [] +options = Biopieces.options_parse(ARGV) -bp = Biopieces.new - -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - bp.puts record.scores2dec! +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + output.puts record.scores2dec! + end end diff --git a/bp_bin/shred_seq b/bp_bin/shred_seq index 86bf321..5b2a886 100755 --- a/bp_bin/shred_seq +++ b/bp_bin/shred_seq @@ -79,21 +79,22 @@ casts = [] casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>true, :default=>500, :allowed=>nil, :disallowed=>'0'} casts << {:long=>'coverage', :short=>'c', :type=>'uint', :mandatory=>true, :default=>100, :allowed=>nil, :disallowed=>'0'} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ and record[:SEQ].length >= options[:size] + entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES]) + entry.type = 'dna' -bp.each_record do |record| - if record.has_key? :SEQ and record[:SEQ].length >= options[:size] - entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES]) - entry.type = 'dna' - - entry.shred(options[:size], options[:coverage]) do |subentry| - bp.puts subentry.to_bp + entry.shred(options[:size], options[:coverage]) do |subentry| + output.puts subentry.to_bp + end end end end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_bin/shuffle_records b/bp_bin/shuffle_records index 194cab0..ae62a31 100755 --- a/bp_bin/shuffle_records +++ b/bp_bin/shuffle_records @@ -31,20 +31,18 @@ require 'maasha/biopieces' -casts = [] - -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV) records = [] -bp.each_record do |record| - records << record -end +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + records << record + end -records.shuffle.each do |record| - bp.puts record + records.shuffle.each do |record| + output.puts record + end end diff --git a/bp_bin/swapcase_seq b/bp_bin/swapcase_seq index 60725f5..bc1cded 100755 --- a/bp_bin/swapcase_seq +++ b/bp_bin/swapcase_seq @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -24,22 +24,20 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Swap lowercase sequence to uppercase and visa versa for all sequences in the stream. +# Swap lower case sequence to uppercase and visa versa for all sequences in the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'maasha/biopieces' -casts = [] +options = Biopieces.options_parse(ARGV) -bp = Biopieces.new - -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - record[:SEQ].swapcase! if record.has_key? :SEQ - bp.puts record +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + record[:SEQ].swapcase! if record.has_key? :SEQ + output.puts record + end end diff --git a/bp_bin/uclust_seq b/bp_bin/uclust_seq index 76c5668..4523ab0 100755 --- a/bp_bin/uclust_seq +++ b/bp_bin/uclust_seq @@ -142,34 +142,34 @@ casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :d casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil} casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +tmpdir = Biopieces.mktmpdir +infile = File.join(tmpdir, "in.fna") +outfile = File.join(tmpdir, "out.uc") -tmpdir = bp.mktmpdir -infile = "#{tmpdir}/in.fna" -outfile = "#{tmpdir}/out.uc" - -Fasta.open(infile, mode="w") do |fasta_io| - bp.each_record do |record| - bp.puts record - fasta_io.puts record +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + Fasta.open(infile, mode="w") do |fasta_io| + input.each_record do |record| + output.puts record + fasta_io.puts record + end end -end -uclust = Uclust.new(infile, outfile, options) -uclust.sort unless options[:no_sort] + uclust = Uclust.new(infile, outfile, options) + uclust.sort unless options[:no_sort] -case options[:method].to_s -when "ublast" then uclust.ublast -when "usearch" then uclust.usearch -when "uclust" then uclust.uclust -when "usearch_uclust" then uclust.usearch_uclust -else raise "Unknown method: #{options[:method]}" -end + case options[:method].to_s + when "ublast" then uclust.ublast + when "usearch" then uclust.usearch + when "uclust" then uclust.uclust + when "usearch_uclust" then uclust.usearch_uclust + else raise "Unknown method: #{options[:method]}" + end -uclust.each do |record| - bp.puts record + uclust.each do |record| + output.puts record + end end diff --git a/bp_test/out/analyze_assembly.out.1 b/bp_test/out/analyze_assembly.out.1 index c47d842..b973713 100644 --- a/bp_test/out/analyze_assembly.out.1 +++ b/bp_test/out/analyze_assembly.out.1 @@ -18,10 +18,3 @@ SEQ_NAME: test5 SEQ: ATGCACATTGATGCACATTGATGCACATTGATGCACATTGATGCACATTG SEQ_LEN: 50 --- -N50: 40 -MAX: 50 -MIN: 10 -MEAN: 30 -TOTAL: 150 -COUNT: 5 ---- diff --git a/bp_test/out/calc_N50.out.1 b/bp_test/out/calc_N50.out.1 index 6e4ddd2..b973713 100644 --- a/bp_test/out/calc_N50.out.1 +++ b/bp_test/out/calc_N50.out.1 @@ -18,5 +18,3 @@ SEQ_NAME: test5 SEQ: ATGCACATTGATGCACATTGATGCACATTGATGCACATTGATGCACATTG SEQ_LEN: 50 --- -N50: 40 ---- diff --git a/code_ruby/lib/maasha/biopieces.rb b/code_ruby/lib/maasha/biopieces.rb index ca23531..c1dc49b 100644 --- a/code_ruby/lib/maasha/biopieces.rb +++ b/code_ruby/lib/maasha/biopieces.rb @@ -1,6 +1,6 @@ raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9" -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -24,11 +24,34 @@ raise "Ruby 1.9 or later required" if RUBY_VERSION < "1.9" # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'fileutils' require 'date' +require 'fileutils' require 'optparse' -require 'open3' require 'pp' +require 'stringio' +require 'zlib' + +TEST = false + +# Monkey patch (evil) changing the to_s method of the Hash class to +# return a Hash in Biopieces format; keys and value pairs on one line +# each seperated with ': ' and terminated by a line of '---'. +class Hash + def to_s + string = "" + + self.each do |key, value| + string << "#{key.to_s}: #{value}\n" + end + + string << "---\n" + + string + end +end + +# Error class for all exceptions to do with the Biopieces class. +class BiopiecesError < StandardError; end # Biopieces are command line scripts and uses OptionParser to parse command line # options according to a list of casts. Each cast prescribes the long and short @@ -42,37 +65,67 @@ require 'pp' class Biopieces include Enumerable - attr_accessor :out # accessor for out stream _ios_ + # Class method to check the integrity of a list of casts, followed by parsing + # options from argv and finally checking the options according to the casts. + def self.options_parse(argv, cast_list=[], script_path=$0) + casts = Casts.new(cast_list) + option_handler = OptionHandler.new(argv, casts, script_path) + options = option_handler.options_parse - # Initialize a Biopiece and write the status to file. - # Options are for testing purposes only. - def initialize(test=nil, input=STDIN, output=STDOUT) - @test = test - @input = input - @output = output + options end - # Check the integrity of a list of casts, followed by parsion options from argv - # and finally checking the options according to the casts. Returns nil if argv - # is empty, otherwise an options hash. - def parse(argv, cast_list=[], script_path=$0) - casts = Casts.new(cast_list) - option_handler = OptionHandler.new(argv, casts, script_path, @test) - @options = option_handler.options_parse + # Class method for opening data streams for reading and writing Biopiece + # records. Records are read from STDIN (default) or file (possibly gzipped) + # and written to STDOUT (default) or file. + def self.open(input = STDIN, output = STDOUT) + input = self.open_input(input) + output = self.open_output(output) - @in = Stream.open(@options, mode="r", @input) + if block_given? + begin + yield input, output + ensure + input.close + output.close + end + else + return input, output + end + end - @options + # Class method to create a temporary directory inside the ENV["BP_TMP"] directory. + def self.mktmpdir + time = Time.now.to_i + user = ENV["USER"] + pid = $$ + path = File.join(ENV["BP_TMP"], [user, time + pid, pid, "bp_tmp"].join("_")) + Dir.mkdir(path) + Status.new.set_tmpdir(path) + path + end + + # Initialize a Biopiece object for either reading or writing from _ios_. + def initialize(ios, stdio = nil) + @ios = ios + @stdio = stdio + end + + # Method to write a Biopiece record to _ios_. + def puts(foo) + @ios << foo.to_s + end + + # Method to close _ios_. + def close + @ios.close unless @stdio end - # Open Biopiece input stream if not open and iterate over all Biopiece - # records in the stream. + # Method to parse and yield a Biopiece record from _ios_. def each_record - return if @in.nil? - record = {} - @in.each_line do |line| + @ios.each_line do |line| case line when /^([^:]+): (.*)$/ record[$1.to_sym] = $2 @@ -80,7 +133,7 @@ class Biopieces yield record unless record.empty? record = {} else - raise "Bad record format: #{line}" + raise BiopiecesError, "Bad record format: #{line}" end end @@ -91,29 +144,42 @@ class Biopieces alias :each :each_record - # Open Biopiece output stream if not open and puts record to the stream. - def puts(record) - @out = Stream.open(@options, mode="w", @output) unless @out.is_a? IO + private + + # Class method for opening data stream for reading Biopiece records. + # Records are read from STDIN (default) or file (possibly gzipped). + def self.open_input(input) + if STDIN.tty? + if input.nil? + input = self.new(StringIO.new) + else + ios = File.open(input, mode='r') + + begin + ios = Zlib::GzipReader.new(ios) + rescue + ios.rewind + end - record.each do |key,value| - @out.print "#{key.to_s}: #{value}\n" + input = self.new(ios) + end + else + input = self.new(STDIN, stdio = true) end - @out.print "---\n" + input end - def to_s - end + # Class method for opening data stream for writing Biopiece records. + # Records are written to STDOU (default) or file. + def self.open_output(output) + if output.nil? + output = self.new(STDOUT, stdio = true) + elsif not output.is_a? IO + output = self.new(File.open(output, mode='w')) + end - # Create a temporary directory inside the ENV["BP_TMP"] dir. - def mktmpdir - time = Time.now.to_i - user = ENV["USER"] - pid = $$ - path = ENV["BP_TMP"] + "/" + [user, time + pid, pid, "bp_tmp"].join("_") - Dir.mkdir(path) - Status.new.set_tmpdir(path) - path + output end end @@ -121,7 +187,6 @@ end # Error class for all exceptions to do with option casts. class CastError < StandardError; end - # Class to handle casts of command line options. Each cast prescribes the long and # short name of the option, the type, if it is mandatory, the default value, and # allowed and disallowed values. An optional list of extra casts can be supplied, @@ -145,7 +210,7 @@ class Casts < Array # Add ubiquitous options casts. def ubiquitous @cast_list << {:long=>'help', :short=>'?', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - @cast_list << {:long=>'stream_in', :short=>'I', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + @cast_list << {:long=>'stream_in', :short=>'I', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} @cast_list << {:long=>'stream_out', :short=>'O', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} @cast_list << {:long=>'verbose', :short=>'v', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} end @@ -253,7 +318,6 @@ class Casts < Array cast[:long] = cast[:long].to_sym end end - end @@ -266,11 +330,10 @@ class OptionHandler REGEX_INT = /^(int|uint)$/ REGEX_STRING = /^(file|file!|dir|dir!|genome)$/ - def initialize(argv, casts, script_path, test=nil) + def initialize(argv, casts, script_path) @argv = argv @casts = casts @script_path = script_path - @test = test end # Parse options from argv using OptionParser and casts denoting long and @@ -315,7 +378,7 @@ class OptionHandler option_parser.parse!(@argv) if print_usage_full? - print_usage_and_exit(true) + print_usage_and_exit(full=true) elsif print_usage_short? print_usage_and_exit end @@ -341,7 +404,7 @@ class OptionHandler # Check if short "usage info" should be printed. def print_usage_short? - if not $stdin.tty? + if not STDIN.tty? return false elsif @options[:stream_in] return false @@ -358,7 +421,7 @@ class OptionHandler # using a system() call and exit. An optional 'full' flag # outputs the full usage info. def print_usage_and_exit(full=nil) - if @test + if TEST return else if full @@ -547,7 +610,7 @@ class Status elap = time_diff(time0, time1) command = [script, args].join(" ") - log_file = ENV["BP_LOG"] + "/biopieces.log" + log_file = File.join(ENV["BP_LOG"], "biopieces.log") File.open(log_file, mode = "a") { |file| file.puts [time0, time1, elap, user, exit_status, command].join("\t") } end @@ -564,7 +627,7 @@ class Status user = ENV["USER"] script = File.basename($0) pid = $$ - path = ENV["BP_TMP"] + "/" + [user, script, pid, "status"].join(".") + path = File.join(ENV["BP_TMP"], [user, script, pid, "status"].join(".")) end # Get the elapsed time from the difference between two time stamps. @@ -574,87 +637,6 @@ class Status end -class Stream < IO - # Open Biopieces output data stream for reading from stdin or a file - # specified in options[:stream_in] OR writing to stdout or a file - # specified in options[:stream_out] or options[:data_out]. - def self.open(options, mode, stdio) - if mode == "r" - if options[:data_in] and options[:data_in].first == "-" - self.nread(["-"]) - else - $stdin.tty? ? read(options[:stream_in]) : stdio - end - elsif mode == "w" - options[:stream_out] ? self.write(options[:stream_out], options[:compress]) : stdio - else - raise "Bad mode #{mode}" - end - end - - private - - # Opens a reads stream to a list of files. - def self.read(files) - return if files.nil? #TODO case/when - self.zipped?(files) ? self.zread(files) : self.nread(files) - end - - # Opens a write stream to a file and returns a _io_ object. - def self.write(file, zip=nil) - zip ? self.zwrite(file) : self.nwrite(file) - end - - # Opens a list of gzipped files for reading and return an _io_ object. - def self.zread(files) - stdin, stdout, stderr = Open3.popen3("zcat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for gzipped writing and return an _io_ object. - def self.zwrite(file) - stdin, stdout, stderr = Open3.popen3("gzip -f > #{file}") - stderr.close - stdout.close - stdin - end - - # Opens a list of files for reading and return an _io_ object. - def self.nread(files) - stdin, stdout, stderr = Open3.popen3("cat " + files.join(' ')); - stdin.close - stderr.close - stdout - end - - # Opens a file for writing and return an _io_ object. - def self.nwrite(file) - File.open(file, mode="w") - end - - # Test if a list of files are gzipped or not. - # Raises if files are mixed zipped and unzipped. - def self.zipped?(files) - type_hash = {} - - files.each do |file| - type = `file #{file}` - - if type =~ /gzip compressed/ - type_hash[:gzip] = true - else - type_hash[:ascii] = true - end - end - - raise "Mixture of zipped and unzipped files" if type_hash.size == 2 - - type_hash[:gzip] - end -end - # Set status when 'biopieces' is required. Status.new.set @@ -682,3 +664,4 @@ end __END__ + diff --git a/code_ruby/test/maasha/test_biopieces.rb b/code_ruby/test/maasha/test_biopieces.rb index f5204c8..86da311 100755 --- a/code_ruby/test/maasha/test_biopieces.rb +++ b/code_ruby/test/maasha/test_biopieces.rb @@ -1,5 +1,4 @@ #!/usr/bin/env ruby -$:.unshift File.join(File.dirname(__FILE__),'..','lib') # Copyright (C) 2007-2010 Martin A. Hansen. @@ -30,179 +29,175 @@ require 'maasha/biopieces' require 'stringio' require 'pp' +Biopieces::TEST = true +OptionHandler::TEST = true + TYPES = %w[flag string list int uint float file file! files files! dir dir! genome] DUMMY_FILE = __FILE__ SCRIPT_PATH = "write_fasta" class BiopiecesTest < Test::Unit::TestCase - - def setup - @input = StringIO.new - @output = StringIO.new - @bp = Biopieces.new(true, @input, @output) - end - # >>>>>>>>>>>>>>>>>>>> Testing Options.new <<<<<<<<<<<<<<<<<<<< - def test_Biopieces_parse_with_all_cast_keys_dont_raise + def test_Biopieces_options_parse_with_all_cast_keys_dont_raise argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end - def test_Biopieces_parse_with_illegal_long_cast_values_raises + def test_Biopieces_options_parse_with_illegal_long_cast_values_raises [nil, true, false, 1, 0, "a"].each do |long| argv = [] casts = [{:long=>long, :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_long_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_long_cast_values_dont_raise ["foo", "!!", "0123"].each do |long| argv = [] casts = [{:long=>long, :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_illegal_short_cast_values_raises + def test_Biopieces_options_parse_with_illegal_short_cast_values_raises [nil, true, false, "foo"].each do |short| argv = [] casts = [{:long=>"foo", :short=>short, :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_short_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_short_cast_values_dont_raise ["!", "1", "a"].each do |short| argv = [] casts = [{:long=>"foo", :short=>short, :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_illegal_type_cast_values_raises + def test_Biopieces_options_parse_with_illegal_type_cast_values_raises [nil, true, false, "foo", 12, 0].each do |type| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>type, :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_type_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_type_cast_values_dont_raise TYPES.each do |type| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>type, :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_illegal_mandatory_cast_values_raises + def test_Biopieces_options_parse_with_illegal_mandatory_cast_values_raises ["yes", 12, 0, nil].each do |mandatory| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>mandatory, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_mandatory_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_mandatory_cast_values_dont_raise [true, false].each do |mandatory| argv = [ "--foo", "1" ] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>mandatory, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_illegal_default_cast_values_raises + def test_Biopieces_options_parse_with_illegal_default_cast_values_raises [true, false, [], {}].each do |default| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>default, :allowed=>nil, :disallowed=>nil}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_default_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_default_cast_values_dont_raise [nil, 0, 1, -1].each do |default| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>default, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_illegal_allowed_cast_values_raises + def test_Biopieces_options_parse_with_illegal_allowed_cast_values_raises [true, false, {}, [], 0, 0.1].each do |allowed| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>allowed, :disallowed=>nil}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_allowed_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_allowed_cast_values_dont_raise ["foo,bar,0",nil].each do |allowed| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>allowed, :disallowed=>nil}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_illegal_disallowed_cast_values_raises + def test_Biopieces_options_parse_with_illegal_disallowed_cast_values_raises [true, false, {}, [], 0, 0.1].each do |disallowed| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>disallowed}] - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_legal_disallowed_cast_values_dont_raise + def test_Biopieces_options_parse_with_legal_disallowed_cast_values_dont_raise ["foo,bar,0",nil].each do |disallowed| argv = [] casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>disallowed}] - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end end - def test_Biopieces_parse_with_duplicate_long_cast_values_raises + def test_Biopieces_options_parse_with_duplicate_long_cast_values_raises argv = [] casts = [] casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>"foo", :short=>"b", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end - def test_Biopieces_parse_with_duplicate_short_cast_values_raises + def test_Biopieces_options_parse_with_duplicate_short_cast_values_raises argv = [] casts = [] casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>"bar", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - assert_raise(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_raise(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end - def test_Biopieces_parse_without_duplicate_long_and_short_cast_values_dont_raise + def test_Biopieces_options_parse_without_duplicate_long_and_short_cast_values_dont_raise argv = [] casts = [] casts << {:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>"bar", :short=>"b", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - assert_nothing_raised(CastError) { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised(CastError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end # >>>>>>>>>>>>>>>>>>>> Testing Options.parse <<<<<<<<<<<<<<<<<<<< - def test_Biopieces_parse_with_empty_argv_and_missing_wiki_file_raises + def test_Biopieces_options_parse_with_empty_argv_and_missing_wiki_file_raises argv = [] casts = [] - assert_raise(RuntimeError) { @bp.parse(argv,casts, "foo") } + assert_raise(RuntimeError) { Biopieces.options_parse(argv,casts, "foo") } end - def test_Biopieces_parse_with_empty_argv_and_existing_wiki_file_dont_raise + def test_Biopieces_options_parse_with_empty_argv_and_existing_wiki_file_dont_raise argv = [] casts = [] - assert_nothing_raised { @bp.parse(argv, casts, SCRIPT_PATH) } + assert_nothing_raised { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } end - def test_Biopieces_parse_with_help_in_argv_and_existing_wiki_output_long_usage + def test_Biopieces_options_parse_with_help_in_argv_and_existing_wiki_output_long_usage argv = ["--help"] - assert_nothing_raised { @bp.parse(argv,[],SCRIPT_PATH) } + assert_nothing_raised { Biopieces.options_parse(argv,[],SCRIPT_PATH) } end # # FIXME This one fails because any argument to a flag is ignored and the flag value is set to true. Should it raise? @@ -212,46 +207,46 @@ class BiopiecesTest < Test::Unit::TestCase # assert_raise(ArgumentError) { opt_parser.parse(["--foo", "bar"],SCRIPT_PATH) } # end - def test_Biopieces_parse_with_stream_in_argv_returns_correct_options + def test_Biopieces_options_parse_with_stream_in_argv_returns_correct_options argv = ["--stream_in", DUMMY_FILE] - options = @bp.parse(argv,[],SCRIPT_PATH) - assert_equal([DUMMY_FILE], options[:stream_in]) - end - - def test_Biopieces_parse_with_I_argv_returns_correct_options - argv = ["-I", DUMMY_FILE] - casts = [] - options = @bp.parse(argv, casts, SCRIPT_PATH) - assert_equal([DUMMY_FILE], options[:stream_in]) - end - - def test_Biopieces_parse_use_cast_default_value_if_no_argument_given - argv = ["-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}] - options = @bp.parse(argv, casts, SCRIPT_PATH) - assert_equal(options[:foo], "bar") - end - - def test_Biopieces_parse_dont_use_default_value_if_argument_given - argv = ["--foo", "bleh", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}] - options = @bp.parse(argv, casts, SCRIPT_PATH) - assert_equal(options[:foo], "bleh".to_sym) - end - - def test_Biopieces_parse_with_mandatory_cast_and_no_argument_raises - argv = ["-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(ArgumentError) { @bp.parse(argv,casts,SCRIPT_PATH) } - end - - def test_Biopieces_parse_with_mandatory_cast_and_argument_dont_raise - argv = ["--foo", "bar", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv,casts,SCRIPT_PATH) } - end - -# # This one results in an error: "OptionParser::InvalidArgument: invalid argument: --foo bar" + options = Biopieces.options_parse(argv,[],SCRIPT_PATH) + assert_equal(DUMMY_FILE, options[:stream_in]) + end + + def test_Biopieces_options_parse_with_I_argv_returns_correct_options + argv = ["-I", DUMMY_FILE] + casts = [] + options = Biopieces.options_parse(argv, casts, SCRIPT_PATH) + assert_equal(DUMMY_FILE, options[:stream_in]) + end + + def test_Biopieces_options_parse_use_cast_default_value_if_no_argument_given + argv = ["-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}] + options = Biopieces.options_parse(argv, casts, SCRIPT_PATH) + assert_equal(options[:foo], "bar") + end + + def test_Biopieces_options_parse_dont_use_default_value_if_argument_given + argv = ["--foo", "bleh", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>"bar", :allowed=>nil, :disallowed=>nil}] + options = Biopieces.options_parse(argv, casts, SCRIPT_PATH) + assert_equal(options[:foo], "bleh".to_sym) + end + + def test_Biopieces_options_parse_with_mandatory_cast_and_no_argument_raises + argv = ["-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(ArgumentError) { Biopieces.options_parse(argv,casts,SCRIPT_PATH) } + end + + def test_Biopieces_options_parse_with_mandatory_cast_and_argument_dont_raise + argv = ["--foo", "bar", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv,casts,SCRIPT_PATH) } + end + + # # This one results in an error: "OptionParser::InvalidArgument: invalid argument: --foo bar" # # So it appears that this is tested in OptionParser already. # test "Options.parse with type cast int and non-int value raises" do # ["bar" ].each do |val| # what about nil, false, true, [], {}, 0.1 ? @@ -261,98 +256,98 @@ class BiopiecesTest < Test::Unit::TestCase # end # end - def test_Biopieces_parse_with_type_cast_int_dont_raise - [0,-1,1,327649123746293746374276347824].each do |val| - argv = ["--foo", "#{val}", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv,casts,SCRIPT_PATH) } - end - end - - # TODO similar test for uint as "test "Options.parse with type cast int and non-int value raises" do" - - def test_Biopieces_parse_with_type_cast_uint_dont_raise - [0,1,327649123746293746374276347824].each do |val| - argv = ["--foo", "#{val}", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"uint", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - end - - def test_Biopieces_parse_with_file_cast_and_file_dont_exists_raises - argv = ["--foo", "bleh", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - - def test_Biopieces_parse_with_file_cast_and_existing_file_dont_raise - argv = ["--foo", DUMMY_FILE, "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - - def test_Biopieces_parse_with_files_cast_and_a_file_dont_exists_raises - argv = ["--foo", DUMMY_FILE + ",bleh", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - - def test_Biopieces_parse_with_files_cast_and_files_exists_dont_raise - argv = ["--foo", DUMMY_FILE + "," + DUMMY_FILE, "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - - # TODO replace the absolute part below the file location with File.dirname(__FILE__) - def test_Biopieces_parse_with_glob_argument_expands_correctly - flunk("This test is flawed and need fixing") - argv = ["--foo", "/Users/maasha/unit_test/foo*,/Users/maasha/unit_test/my_dir/*.fna", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - options = @bp.parse(argv, casts, SCRIPT_PATH) - assert_equal(["/Users/maasha/unit_test/foo.fna", "/Users/maasha/unit_test/my_dir/bar.fna"], options[:foo]) - end - - def test_Biopieces_parse_with_dir_cast_and_dir_dont_exists_raises - argv = ["--foo", "bleh", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_raise(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - - def test_Biopieces_parse_with_dir_cast_and_dir_exists_dont_raise - argv = ["--foo", "/", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - - def test_Biopieces_parse_with_allowed_cast_and_not_allowed_value_raises - ["bleh", "2", "3.3"].each do |val| - argv = ["--foo", "#{val}", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}] - assert_raise(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - end - - def test_Biopieces_parse_with_allowed_cast_and_allowed_values_dont_raise - ["0", "-1", "0.0", "1", "bar"].each do |val| - argv = ["--foo", "#{val}", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - end - - def test_Biopieces_parse_with_disallowed_cast_and_disallowed_value_raises - ["0", "-1", "0.0", "1", "bar"].each do |val| - argv = ["--foo", "#{val}", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}] - assert_raise(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - end - - def test_Biopieces_parse_with_disallowed_cast_and_allowed_values_dont_raise - ["bleh", "2", "3.3"].each do |val| - argv = ["--foo", "#{val}", "-I", DUMMY_FILE] - casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}] - assert_nothing_raised(ArgumentError) { @bp.parse(argv, casts, SCRIPT_PATH) } - end - end + def test_Biopieces_options_parse_with_type_cast_int_dont_raise + [0,-1,1,327649123746293746374276347824].each do |val| + argv = ["--foo", "#{val}", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"int", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv,casts,SCRIPT_PATH) } + end + end + + # TODO similar test for uint as "test "Options.parse with type cast int and non-int value raises" do" + + def test_Biopieces_options_parse_with_type_cast_uint_dont_raise + [0,1,327649123746293746374276347824].each do |val| + argv = ["--foo", "#{val}", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"uint", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + end + + def test_Biopieces_options_parse_with_file_cast_and_file_dont_exists_raises + argv = ["--foo", "bleh", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + + def test_Biopieces_options_parse_with_file_cast_and_existing_file_dont_raise + argv = ["--foo", DUMMY_FILE, "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"file!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + + def test_Biopieces_options_parse_with_files_cast_and_a_file_dont_exists_raises + argv = ["--foo", DUMMY_FILE + ",bleh", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + + def test_Biopieces_options_parse_with_files_cast_and_files_exists_dont_raise + argv = ["--foo", DUMMY_FILE + "," + DUMMY_FILE, "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + +# # TODO replace the absolute part below the file location with File.dirname(__FILE__) +# def test_Biopieces_options_parse_with_glob_argument_expands_correctly +# flunk("This test is flawed and need fixing") +# argv = ["--foo", "/Users/maasha/unit_test/foo*,/Users/maasha/unit_test/my_dir/*.fna", "-I", DUMMY_FILE] +# casts = [{:long=>"foo", :short=>"f", :type=>"files!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] +# options = Biopieces.options_parse(argv, casts, SCRIPT_PATH) +# assert_equal(["/Users/maasha/unit_test/foo.fna", "/Users/maasha/unit_test/my_dir/bar.fna"], options[:foo]) +# end + + def test_Biopieces_options_parse_with_dir_cast_and_dir_dont_exists_raises + argv = ["--foo", "bleh", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_raise(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + + def test_Biopieces_options_parse_with_dir_cast_and_dir_exists_dont_raise + argv = ["--foo", "/", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"dir!", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + + def test_Biopieces_options_parse_with_allowed_cast_and_not_allowed_value_raises + ["bleh", "2", "3.3"].each do |val| + argv = ["--foo", "#{val}", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}] + assert_raise(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + end + + def test_Biopieces_options_parse_with_allowed_cast_and_allowed_values_dont_raise + ["0", "-1", "0.0", "1", "bar"].each do |val| + argv = ["--foo", "#{val}", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>"0,-1,0.0,1,bar", :disallowed=>nil}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + end + + def test_Biopieces_options_parse_with_disallowed_cast_and_disallowed_value_raises + ["0", "-1", "0.0", "1", "bar"].each do |val| + argv = ["--foo", "#{val}", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}] + assert_raise(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + end + + def test_Biopieces_options_parse_with_disallowed_cast_and_allowed_values_dont_raise + ["bleh", "2", "3.3"].each do |val| + argv = ["--foo", "#{val}", "-I", DUMMY_FILE] + casts = [{:long=>"foo", :short=>"f", :type=>"string", :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0,-1,0.0,1,bar"}] + assert_nothing_raised(ArgumentError) { Biopieces.options_parse(argv, casts, SCRIPT_PATH) } + end + end end -- 2.39.5