]> git.donarmstrong.com Git - biopieces.git/commitdiff
added assemble_seq_velvet draft biopiece
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 4 May 2011 19:36:35 +0000 (19:36 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 4 May 2011 19:36:35 +0000 (19:36 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1371 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/assemble_seq_idba
bp_bin/assemble_seq_velvet [new file with mode: 0755]

index 1fad90fea89cb6328a92194a10534fd9ec4e3fe0..c1d72a23f778a5aa7d4c54f1642ad0fccba2f294 100755 (executable)
@@ -49,7 +49,7 @@ options = bp.parse(ARGV, casts)
 
 Dir.mkdir(options[:directory]) unless Dir.exists?(options[:directory])
 
-file_fasta = [options[:directory], options[:name] ].join(File::SEPARATOR) + ".fna"
+file_fasta = [options[:directory], options[:name]].join(File::SEPARATOR) + ".fna"
 
 Fasta.open(file_fasta, mode="w") do |fasta_io|
   bp.each_record do |record|
@@ -58,7 +58,7 @@ Fasta.open(file_fasta, mode="w") do |fasta_io|
 end
 
 unless File.size(file_fasta) == 0
-  output = [options[:directory], options[:name] ].join(File::SEPARATOR)
+  output = [options[:directory], options[:name]].join(File::SEPARATOR)
 
   commands = []
   commands << "nice -n 19"
diff --git a/bp_bin/assemble_seq_velvet b/bp_bin/assemble_seq_velvet
new file mode 100755 (executable)
index 0000000..cf6160f
--- /dev/null
@@ -0,0 +1,195 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Assemble sequences in the stream using Velvet.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'biopieces'
+require 'fasta'
+require 'pp'
+
+class Velvet
+  def initialize(directory, sequence_file, verbose)
+    @directory     = directory
+    @sequence_file = sequence_file
+    @verbose       = verbose
+  end
+
+  def run_velveth(kmer_min, kmer_max, type)
+    @kmer_min = kmer_min
+    @kmer_max = kmer_max
+
+    kmer = @kmer_min
+
+    while kmer <= @kmer_max
+      dir_velveth = [@directory, "Velvet_#{kmer}"].join(File::SEPARATOR)
+
+      Dir.mkdir(dir_velveth)
+
+      commands = []
+      commands << "velveth"
+      commands << dir_velveth
+      commands << kmer
+      commands << "-#{type}"
+      commands << "-fasta"
+      commands << @sequence_file
+
+      execute(commands)
+
+      kmer += 2
+    end
+  end
+
+  def run_velvetg(cov_cutoffs, exp_cov)
+    Dir.glob("#{@directory}/Velvet_*").each do |dir_velveth|
+      files_velveth = Dir.glob("#{dir_velveth}/*")
+
+      cov_cutoffs.each do |cov_cutoff|
+        dir_velvetg = [dir_velveth, "Cov_cutoff_#{cov_cutoff}"].join(File::SEPARATOR)
+
+        Dir.mkdir(dir_velvetg)
+        FileUtils.cp_r files_velveth, dir_velvetg
+        
+        commands = []
+        commands << "velvetg"
+        commands << dir_velvetg
+        commands << "-cov_cutoff #{cov_cutoff}"
+        commands << "-exp_cov #{exp_cov}"
+
+        execute(commands)
+      end
+    end
+  end
+
+  def pick_best_assembly
+    list = []
+
+    Dir.glob("#{@directory}/Velvet_*/Cov_cutoff_*/contigs.fa").each do |file|
+      n50 = fasta_n50(file)
+      list << [file, n50]
+    end
+
+    list.sort_by { |e| e.last }.last.first
+  end
+
+  private
+
+  def execute(commands)
+    commands.unshift "nice -n 19"
+    commands.push "> /dev/null 2>&1" unless @verbose
+
+    command = commands.join(" ")
+
+    begin
+      system(command)
+      raise "Command failed: #{command}" unless $?.success?
+    rescue
+      $stderr.puts "Command failed: #{command}"
+    end
+  end
+
+  def fasta_n50(file)
+    total   = 0
+    lengths = []
+
+    Fasta.open(file, mode="r") do |fasta_io|
+      fasta_io.each do |entry|
+        total   += entry.length
+        lengths << entry.length
+      end
+    end
+
+    n50(total, lengths)
+  end
+
+  def n50(total, lengths)
+    count = 0
+    n50   = 0
+
+    lengths.sort.reverse.each do |length|
+      count += length
+
+      if count >= total * 0.50
+        n50 = length
+        break
+      end
+    end
+
+    n50
+  end
+end
+
+types       = 'short,shortPaired,long,longPaired'
+cov_cutoffs = "2,4,8,16"
+
+casts = []
+casts << {:long=>'directory',  :short=>'d', :type=>'dir',    :mandatory=>true,  :default=>nil,         :allowed=>nil,   :disallowed=>nil}
+casts << {:long=>'type',       :short=>'t', :type=>'string', :mandatory=>true,  :default=>'short',     :allowed=>types, :disallowed=>nil}
+casts << {:long=>'kmer_min',   :short=>'k', :type=>'uint',   :mandatory=>true,  :default=>19,          :allowed=>nil,   :disallowed=>nil}
+casts << {:long=>'kmer_max',   :short=>'K', :type=>'uint',   :mandatory=>true,  :default=>31,          :allowed=>nil,   :disallowed=>nil}
+casts << {:long=>'cov_cutoff', :short=>'c', :type=>'list',   :mandatory=>true,  :default=>cov_cutoffs, :allowed=>nil,   :disallowed=>nil}
+casts << {:long=>'exp_cov',    :short=>'e', :type=>'float',  :mandatory=>true,  :default=>'auto',      :allowed=>nil,   :disallowed=>nil}
+casts << {:long=>'clean',      :short=>'x', :type=>'flag',   :mandatory=>false, :default=>nil,         :allowed=>nil,   :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+raise ArgumentError, "kmer_min #{options[:kmer_min]} must be uneven." if options[:kmer_min].even?
+raise ArgumentError, "kmer_max #{options[:kmer_max]} must be uneven." if options[:kmer_max].even?
+raise ArgumentError, "kmer_min >= kmer_max: #{options[:kmer_min]} >= #{options[:kmer_max]}" unless options[:kmer_max] > options[:kmer_min]
+
+Dir.mkdir(options[:directory]) unless Dir.exists?(options[:directory])
+
+file_fasta = [options[:directory], "sequence_in.fna"].join(File::SEPARATOR)
+
+Fasta.open(file_fasta, mode="w") do |fasta_io|
+  bp.each_record do |record|
+    fasta_io.puts record
+  end
+end
+
+unless File.size(file_fasta) == 0
+  velvet = Velvet.new(options[:directory], file_fasta, options[:verbose])
+  velvet.run_velveth(options[:kmer_min], options[:kmer_max], options[:type])
+  velvet.run_velvetg(options[:cov_cutoff], options[:exp_cov])
+  file_contigs = velvet.pick_best_assembly
+
+  Fasta.open(file_contigs, mode="r") do |fasta_io|
+    fasta_io.each do |entry|
+      bp.puts entry.to_bp
+    end
+  end
+end
+
+FileUtils.remove_entry_secure options[:directory] if options[:clean]
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__