def run_velveth(kmer_min, kmer_max, kmer_step, type)
commands = []
commands << "velveth"
- commands << @directory
- commands << "#{kmer_min},#{kmer_max},#{kmer_step}"
+ commands << @directory + File::SEPARATOR
+ commands << "#{kmer_min},#{kmer_max + kmer_step},#{kmer_step}" # FIXME - ugly fix for velvet bug
+ commands << "-create_binary"
commands << "-#{type}"
commands << "-fasta"
commands << @sequence_file
execute(commands)
end
- def run_velvetg(cov_cutoff, exp_cov, paired)
- commands = []
- commands << "velvetg"
- commands << @directory
- commands << "-cov_cutoff #{cov_cutoff}"
- commands << "-exp_cov #{exp_cov}"
- commands << "-ins_length #{ins_length}" if paired
- commands << "-clean yes"
+ def run_velvetg(ins_length, paired)
+ dirs = Dir.entries(@directory).select { |d| d.match(/^_/) }
- execute(commands)
+ dirs.each do |dir|
+ commands = []
+ commands << "velvetg"
+ commands << File.join(@directory, dir)
+ commands << "-cov_cutoff auto"
+ commands << "-exp_cov auto"
+ commands << "-ins_length #{ins_length}" if paired
+ commands << "-clean yes"
+ commands << "-min_contig_lgth 200"
+
+ execute(commands)
+ end
+ end
+
+ def pick_best_assembly
+ list = []
+
+ Dir.glob("#{@directory}/_*/contigs.fa").each do |file|
+ n50 = fasta_n50(file)
+ list << [file, n50]
+ end
+
+ list.sort_by { |e| e.last }.last.first
end
private
command = commands.join(" ")
begin
+ $stderr.puts command if @verbose
system(command)
raise "Command failed: #{command}" unless $?.success?
rescue
$stderr.puts "Command failed: #{command}"
end
end
+
+ def fasta_n50(file)
+ total = 0
+ lengths = []
+ count = 0
+ n50 = 0
+
+ Fasta.open(file, mode="r") do |fasta_io|
+ fasta_io.each do |entry|
+ total += entry.length
+ lengths << entry.length
+ end
+ end
+
+ lengths.sort.reverse.each do |length|
+ count += length
+
+ if count >= total * 0.50
+ n50 = length
+ break
+ end
+ end
+
+ n50
+ end
end
types = 'short,shortPaired,long,longPaired'
casts << {:long=>'kmer_min', :short=>'k', :type=>'uint', :mandatory=>true, :default=>19, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'kmer_max', :short=>'K', :type=>'uint', :mandatory=>true, :default=>31, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'kmer_step', :short=>'s', :type=>'uint', :mandatory=>true, :default=>2, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'cov_cutoff', :short=>'c', :type=>'float', :mandatory=>true, :default=>'auto', :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'exp_cov', :short=>'e', :type=>'float', :mandatory=>true, :default=>'auto', :allowed=>nil, :disallowed=>nil}
casts << {:long=>'ins_length', :short=>'i', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'clean', :short=>'X', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
unless File.size(file_fasta) == 0
velvet = Velvet.new(options[:directory], file_fasta, options[:verbose])
velvet.run_velveth(options[:kmer_min], options[:kmer_max], options[:kmer_step], options[:type])
- velvet.run_velvetg(options[:cov_cutoff], options[:exp_cov], options[:type].match("Paired"))
- file_contigs = File.join(options[:directory], "contigs.fa")
+ velvet.run_velvetg(options[:ins_length], options[:type].match("Paired"))
+ file_contigs = velvet.pick_best_assembly
+
+ kmer = file_contigs.match(/_\d+/)
Fasta.open(file_contigs, mode="r") do |fasta_io|
- fasta_io.each do |entry|
- output.puts entry.to_bp
- end
+ fasta_io.each do |entry|
+ entry.seq_name << "_kmer#{kmer}"
+ output.puts entry.to_bp
+ end
end
- end
+ end
end
FileUtils.remove_entry_secure file_fasta
FileUtils.remove_entry_secure options[:directory] if options[:clean]
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<