]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/analyze_assembly
adding bzip2 support in ruby
[biopieces.git] / bp_bin / analyze_assembly
index 610d7e194a1748f985dfeb1f27fea73fe33355c3..614a9e54d8e85bc00e84e2a6deab96049c0ddb5d 100755 (executable)
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
-require 'biopieces'
+require 'maasha/biopieces'
+require 'maasha/fasta'
+require 'maasha/prodigal'
 require 'pp'
 
 casts = []
-casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'data_out',  :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'gene_cov',  :short=>'g', :type=>'flag',   :mandatory=>false, :default=>nil,      :allowed=>nil,           :disallowed=>nil}
+casts << {:long=>'procedure', :short=>'p', :type=>'string', :mandatory=>true,  :default=>'single', :allowed=>'single,meta', :disallowed=>nil}
+casts << {:long=>'no_stream', :short=>'x', :type=>'flag',   :mandatory=>false, :default=>nil,      :allowed=>nil,           :disallowed=>nil}
+casts << {:long=>'data_out',  :short=>'o', :type=>'file',   :mandatory=>false, :default=>nil,      :allowed=>nil,           :disallowed=>nil}
 
-bp = Biopieces.new
+options = Biopieces.options_parse(ARGV, casts)
 
-options = bp.parse(ARGV, casts)
+options[:full] = true;
 
 total   = 0
 lengths = []
-
-bp.each_record do |record|
-  bp.puts record unless options[:no_stream]
-
-  if record.has_key? :SEQ
-    total   += record[:SEQ].length
-    lengths << record[:SEQ].length
+tmpdir  = Biopieces.mktmpdir
+infile  = File.join(tmpdir, "in.fna")
+outfile = File.join(tmpdir, "out.prodigal")
+
+Fasta.open(infile, "w") do |fasta_output|
+  Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+    input.each_record do |record|
+      if record[:SEQ]
+        seq = Seq.new_bp(record)
+
+        total   += record[:SEQ].length
+        lengths << record[:SEQ].length
+      end
+
+      output.puts record unless options[:no_stream]
+      fasta_output.puts seq.to_fasta
+    end
   end
 end
 
+if options[:gene_cov]
+  prodigal = Prodigal.new(infile, outfile, options)
+  prodigal.run
+  gene_cov = prodigal.coverage
+end
+
 count = 0
 n50   = 0
 
@@ -64,17 +84,19 @@ lengths.sort.reverse.each do |length|
   end
 end
 
-bp.out = Stream.write(options[:data_out]) if options[:data_out]
-
 new_record = {}
-new_record[:N50]   = n50
-new_record[:MAX]   = lengths.max
-new_record[:MIN]   = lengths.min
-new_record[:MEAN]  = (total.to_f / lengths.size.to_f).to_i
-new_record[:TOTAL] = total
-new_record[:COUNT] = lengths.size
-
-bp.puts new_record
+new_record[:N50]        = n50
+new_record[:MAX]        = lengths.max
+new_record[:MIN]        = lengths.min
+new_record[:MEAN]       = (total.to_f / lengths.size.to_f).to_i
+new_record[:TOTAL]      = total
+new_record[:COUNT]      = lengths.size
+new_record[:GENE_COV]   = gene_cov if options[:gene_cov]
+
+Biopieces.open(nil, options[:data_out]) do |input, output|
+  output.puts new_record
+end
+
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<