]> git.donarmstrong.com Git - biopieces.git/commitdiff
fixed up assemble_seq_velvet
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 9 Jan 2013 11:20:20 +0000 (11:20 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 9 Jan 2013 11:20:20 +0000 (11:20 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2067 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/assemble_seq_velvet
bp_bin/biostat

index 3b023b213caca2664ce071c075af00a396ba7f6e..d6106fc0f3fbe7c996282437d5c4c6d6d15ea628 100755 (executable)
@@ -41,8 +41,9 @@ class Velvet
   def run_velveth(kmer_min, kmer_max, kmer_step, type)
     commands = []
     commands << "velveth"
-    commands << @directory
-    commands << "#{kmer_min},#{kmer_max},#{kmer_step}"
+    commands << @directory + File::SEPARATOR
+    commands << "#{kmer_min},#{kmer_max + kmer_step},#{kmer_step}"   # FIXME - ugly fix for velvet bug
+    commands << "-create_binary"
     commands << "-#{type}"
     commands << "-fasta"
     commands << @sequence_file
@@ -50,16 +51,32 @@ class Velvet
     execute(commands)
   end
 
-  def run_velvetg(cov_cutoff, exp_cov, paired)
-    commands = []
-    commands << "velvetg"
-    commands << @directory
-    commands << "-cov_cutoff #{cov_cutoff}"
-    commands << "-exp_cov #{exp_cov}"
-    commands << "-ins_length #{ins_length}" if paired
-    commands << "-clean yes"
+  def run_velvetg(ins_length, paired)
+    dirs = Dir.entries(@directory).select { |d| d.match(/^_/) }
 
-    execute(commands)
+    dirs.each do |dir|
+      commands = []
+      commands << "velvetg"
+      commands << File.join(@directory, dir)
+      commands << "-cov_cutoff auto"
+      commands << "-exp_cov auto"
+      commands << "-ins_length #{ins_length}" if paired
+      commands << "-clean yes"
+      commands << "-min_contig_lgth 200"
+
+      execute(commands)
+    end
+  end
+
+  def pick_best_assembly
+    list = []
+
+    Dir.glob("#{@directory}/_*/contigs.fa").each do |file|
+      n50 = fasta_n50(file)
+      list << [file, n50]
+    end
+
+    list.sort_by { |e| e.last }.last.first
   end
 
   private
@@ -70,12 +87,38 @@ class Velvet
     command = commands.join(" ")
 
     begin
+      $stderr.puts command if @verbose
       system(command)
       raise "Command failed: #{command}" unless $?.success?
     rescue
       $stderr.puts "Command failed: #{command}"
     end
   end
+
+  def fasta_n50(file)
+    total   = 0
+    lengths = []
+    count   = 0
+    n50     = 0
+
+    Fasta.open(file, mode="r") do |fasta_io|
+      fasta_io.each do |entry|
+        total   += entry.length
+        lengths << entry.length
+      end
+    end
+
+    lengths.sort.reverse.each do |length|
+      count += length
+
+      if count >= total * 0.50
+        n50 = length
+        break
+      end
+    end
+
+    n50
+  end
 end
 
 types = 'short,shortPaired,long,longPaired'
@@ -86,8 +129,6 @@ casts << {:long=>'type',       :short=>'t', :type=>'string', :mandatory=>true,
 casts << {:long=>'kmer_min',   :short=>'k', :type=>'uint',   :mandatory=>true,  :default=>19,      :allowed=>nil,   :disallowed=>nil}
 casts << {:long=>'kmer_max',   :short=>'K', :type=>'uint',   :mandatory=>true,  :default=>31,      :allowed=>nil,   :disallowed=>nil}
 casts << {:long=>'kmer_step',  :short=>'s', :type=>'uint',   :mandatory=>true,  :default=>2,       :allowed=>nil,   :disallowed=>nil}
-casts << {:long=>'cov_cutoff', :short=>'c', :type=>'float',  :mandatory=>true,  :default=>'auto',  :allowed=>nil,   :disallowed=>nil}
-casts << {:long=>'exp_cov',    :short=>'e', :type=>'float',  :mandatory=>true,  :default=>'auto',  :allowed=>nil,   :disallowed=>nil}
 casts << {:long=>'ins_length', :short=>'i', :type=>'uint',   :mandatory=>false, :default=>nil,     :allowed=>nil,   :disallowed=>nil}
 casts << {:long=>'clean',      :short=>'X', :type=>'flag',   :mandatory=>false, :default=>nil,     :allowed=>nil,   :disallowed=>nil}
 
@@ -115,21 +156,20 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
        unless File.size(file_fasta) == 0
                velvet = Velvet.new(options[:directory], file_fasta, options[:verbose])
                velvet.run_velveth(options[:kmer_min], options[:kmer_max], options[:kmer_step], options[:type])
-               velvet.run_velvetg(options[:cov_cutoff], options[:exp_cov], options[:type].match("Paired"))
-               file_contigs = File.join(options[:directory], "contigs.fa")
+               velvet.run_velvetg(options[:ins_length], options[:type].match("Paired"))
+               file_contigs = velvet.pick_best_assembly
 
                Fasta.open(file_contigs, mode="r") do |fasta_io|
-                       fasta_io.each do |entry|
-                               output.puts entry.to_bp
-                       end
+                 fasta_io.each do |entry|
+                         output.puts entry.to_bp
+           end
                end
-       end
+       end
 end
 
 FileUtils.remove_entry_secure file_fasta
 FileUtils.remove_entry_secure options[:directory] if options[:clean]
 
-
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
index 175d6fd5c1d9b75e6dedab1cffed0969ebfba0ff..c0b7c13971ba1c938dcc11650918954dd8a5eef5 100755 (executable)
@@ -163,10 +163,13 @@ sub get_status_info
 
     my ( $fh, $line, $time, $args, $tmp_dir, $du );
 
-    $fh = Maasha::Filesys::file_read_open( $file );
-    flock($fh, 1);
-    $line = <$fh>;
-    close $fh;
+    if (-e $file)
+    {
+        $fh = Maasha::Filesys::file_read_open( $file );
+        flock($fh, 1);
+        $line = <$fh>;
+        close $fh;
+    }
 
     chomp $line;