X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fanalyze_assembly;h=614a9e54d8e85bc00e84e2a6deab96049c0ddb5d;hb=2f0fd91b461033529a4a72e161bd133252a22eb6;hp=610d7e194a1748f985dfeb1f27fea73fe33355c3;hpb=0be23a86e418adce44df86123d17ea5a9552e369;p=biopieces.git diff --git a/bp_bin/analyze_assembly b/bp_bin/analyze_assembly index 610d7e1..614a9e5 100755 --- a/bp_bin/analyze_assembly +++ b/bp_bin/analyze_assembly @@ -29,29 +29,49 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'biopieces' +require 'maasha/biopieces' +require 'maasha/fasta' +require 'maasha/prodigal' require 'pp' casts = [] -casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'gene_cov', :short=>'g', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'procedure', :short=>'p', :type=>'string', :mandatory=>true, :default=>'single', :allowed=>'single,meta', :disallowed=>nil} +casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -bp = Biopieces.new +options = Biopieces.options_parse(ARGV, casts) -options = bp.parse(ARGV, casts) +options[:full] = true; total = 0 lengths = [] - -bp.each_record do |record| - bp.puts record unless options[:no_stream] - - if record.has_key? :SEQ - total += record[:SEQ].length - lengths << record[:SEQ].length +tmpdir = Biopieces.mktmpdir +infile = File.join(tmpdir, "in.fna") +outfile = File.join(tmpdir, "out.prodigal") + +Fasta.open(infile, "w") do |fasta_output| + Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] + seq = Seq.new_bp(record) + + total += record[:SEQ].length + lengths << record[:SEQ].length + end + + output.puts record unless options[:no_stream] + fasta_output.puts seq.to_fasta + end end end +if options[:gene_cov] + prodigal = Prodigal.new(infile, outfile, options) + prodigal.run + gene_cov = prodigal.coverage +end + count = 0 n50 = 0 @@ -64,17 +84,19 @@ lengths.sort.reverse.each do |length| end end -bp.out = Stream.write(options[:data_out]) if options[:data_out] - new_record = {} -new_record[:N50] = n50 -new_record[:MAX] = lengths.max -new_record[:MIN] = lengths.min -new_record[:MEAN] = (total.to_f / lengths.size.to_f).to_i -new_record[:TOTAL] = total -new_record[:COUNT] = lengths.size - -bp.puts new_record +new_record[:N50] = n50 +new_record[:MAX] = lengths.max +new_record[:MIN] = lengths.min +new_record[:MEAN] = (total.to_f / lengths.size.to_f).to_i +new_record[:TOTAL] = total +new_record[:COUNT] = lengths.size +new_record[:GENE_COV] = gene_cov if options[:gene_cov] + +Biopieces.open(nil, options[:data_out]) do |input, output| + output.puts new_record +end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<