]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/analyze_assembly
added gene coverage to analyze_assembly
[biopieces.git] / bp_bin / analyze_assembly
index 610d7e194a1748f985dfeb1f27fea73fe33355c3..d6b65a6d7bd4c4fc6e514df2126234724efc1d34 100755 (executable)
 
 
 require 'biopieces'
+require 'fasta'
+require 'prodigal'
 require 'pp'
 
 casts = []
+casts << {:long=>'gene_cov',  :short=>'g', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
 casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
 casts << {:long=>'data_out',  :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
 
@@ -40,18 +43,32 @@ bp = Biopieces.new
 
 options = bp.parse(ARGV, casts)
 
+options[:full] = true;
+
 total   = 0
 lengths = []
-
-bp.each_record do |record|
-  bp.puts record unless options[:no_stream]
-
-  if record.has_key? :SEQ
-    total   += record[:SEQ].length
-    lengths << record[:SEQ].length
+tmpdir  = bp.mktmpdir
+infile  = "#{tmpdir}/in.fna"
+outfile = "#{tmpdir}/out.prodigal"
+
+Fasta.open(infile, mode="w") do |fasta_io|
+  bp.each_record do |record|
+    if record.has_key? :SEQ
+      total   += record[:SEQ].length
+      lengths << record[:SEQ].length
+    end
+
+    bp.puts record unless options[:no_stream]
+    fasta_io.puts record
   end
 end
 
+if options[:gene_cov]
+  prodigal = Prodigal.new(infile, outfile, options)
+  prodigal.run
+  gene_cov = prodigal.coverage
+end
+
 count = 0
 n50   = 0
 
@@ -67,12 +84,17 @@ end
 bp.out = Stream.write(options[:data_out]) if options[:data_out]
 
 new_record = {}
-new_record[:N50]   = n50
-new_record[:MAX]   = lengths.max
-new_record[:MIN]   = lengths.min
-new_record[:MEAN]  = (total.to_f / lengths.size.to_f).to_i
-new_record[:TOTAL] = total
-new_record[:COUNT] = lengths.size
+new_record[:N50]        = n50
+new_record[:MAX]        = lengths.max
+new_record[:MIN]        = lengths.min
+new_record[:MEAN]       = (total.to_f / lengths.size.to_f).to_i
+new_record[:TOTAL]      = total
+new_record[:COUNT]      = lengths.size
+
+if options[:gene_cov]
+  new_record[:GENE_COV]   = gene_cov
+  new_record[:GENE_RATIO] = (gene_cov.to_f / (2 * total.to_f)).round(3)
+end
 
 bp.puts new_record