From a26f6e31342b6cdec858215423389f49ee1b7655 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 26 May 2011 12:53:53 +0000 Subject: [PATCH] fixed mate-pair name issue in assemble_seq_idba git-svn-id: http://biopieces.googlecode.com/svn/trunk@1433 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/assemble_seq_idba | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/bp_bin/assemble_seq_idba b/bp_bin/assemble_seq_idba index a8b6c25..cdb6d99 100755 --- a/bp_bin/assemble_seq_idba +++ b/bp_bin/assemble_seq_idba @@ -32,15 +32,15 @@ require 'maasha/biopieces' require 'maasha/fasta' casts = [] -casts << {:long=>'directory', :short=>'d', :type=>'dir', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'scaffold', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'kmer_min', :short=>'k', :type=>'uint', :mandatory=>false, :default=>25, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'kmer_max', :short=>'K', :type=>'uint', :mandatory=>false, :default=>50, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'count_min', :short=>'c', :type=>'uint', :mandatory=>false, :default=>2, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'cover', :short=>'C', :type=>'uint', :mandatory=>false, :default=>0, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'pairs_min', :short=>'p', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'prefix_len', :short=>'P', :type=>'uint', :mandatory=>false, :default=>3, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'clean', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'directory', :short=>'d', :type=>'dir', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'scaffold', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'kmer_min', :short=>'k', :type=>'uint', :mandatory=>false, :default=>25, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'kmer_max', :short=>'K', :type=>'uint', :mandatory=>false, :default=>50, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'count_min', :short=>'c', :type=>'uint', :mandatory=>false, :default=>2, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'cover', :short=>'C', :type=>'uint', :mandatory=>false, :default=>0, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'pairs_min', :short=>'p', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'prefix_len', :short=>'P', :type=>'uint', :mandatory=>false, :default=>3, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'clean', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) @@ -48,9 +48,21 @@ Dir.mkdir(options[:directory]) unless Dir.exists?(options[:directory]) file_fasta = File.join(options[:directory], "IDBA") + ".fna" +count = 0 + Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| Fasta.open(file_fasta, mode="w") do |fasta_io| input.each_record do |record| + if options[:scaffold] # we need to fix the sequence name for mate-pair IDBA + if records[:SEQ_NAME] =~ /1$/ + record[:SEQ_NAME] = "read#{count}/1" + else + record[:SEQ_NAME] = "read#{count}/2" + + count += 1 + end + end + fasta_io.puts record end end @@ -81,7 +93,11 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| $stderr.puts "Command failed: #{command}" end - file_contig = File.join(options[:directory], "IDBA") + "-contig.fa" + if options[:scaffold] + file_contig = File.join(options[:directory], "IDBA") + "-contig-mate.fa" + else + file_contig = File.join(options[:directory], "IDBA") + "-contig.fa" + end Fasta.open(file_contig, mode="r") do |fasta_io| fasta_io.each do |entry| -- 2.39.5