From: martinahansen Date: Mon, 9 May 2011 10:32:57 +0000 (+0000) Subject: added gene coverage to analyze_assembly X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=7c9d00c225de4c434f1ad292b6764bae9a05b73c;p=biopieces.git added gene coverage to analyze_assembly git-svn-id: http://biopieces.googlecode.com/svn/trunk@1386 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/analyze_assembly b/bp_bin/analyze_assembly index 610d7e1..d6b65a6 100755 --- a/bp_bin/analyze_assembly +++ b/bp_bin/analyze_assembly @@ -30,9 +30,12 @@ require 'biopieces' +require 'fasta' +require 'prodigal' require 'pp' casts = [] +casts << {:long=>'gene_cov', :short=>'g', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} @@ -40,18 +43,32 @@ bp = Biopieces.new options = bp.parse(ARGV, casts) +options[:full] = true; + total = 0 lengths = [] - -bp.each_record do |record| - bp.puts record unless options[:no_stream] - - if record.has_key? :SEQ - total += record[:SEQ].length - lengths << record[:SEQ].length +tmpdir = bp.mktmpdir +infile = "#{tmpdir}/in.fna" +outfile = "#{tmpdir}/out.prodigal" + +Fasta.open(infile, mode="w") do |fasta_io| + bp.each_record do |record| + if record.has_key? :SEQ + total += record[:SEQ].length + lengths << record[:SEQ].length + end + + bp.puts record unless options[:no_stream] + fasta_io.puts record end end +if options[:gene_cov] + prodigal = Prodigal.new(infile, outfile, options) + prodigal.run + gene_cov = prodigal.coverage +end + count = 0 n50 = 0 @@ -67,12 +84,17 @@ end bp.out = Stream.write(options[:data_out]) if options[:data_out] new_record = {} -new_record[:N50] = n50 -new_record[:MAX] = lengths.max -new_record[:MIN] = lengths.min -new_record[:MEAN] = (total.to_f / lengths.size.to_f).to_i -new_record[:TOTAL] = total -new_record[:COUNT] = lengths.size +new_record[:N50] = n50 +new_record[:MAX] = lengths.max +new_record[:MIN] = lengths.min +new_record[:MEAN] = (total.to_f / lengths.size.to_f).to_i +new_record[:TOTAL] = total +new_record[:COUNT] = lengths.size + +if options[:gene_cov] + new_record[:GENE_COV] = gene_cov + new_record[:GENE_RATIO] = (gene_cov.to_f / (2 * total.to_f)).round(3) +end bp.puts new_record