From 256548158f770fa6b518a5cc86858655bdb910fe Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 13 Mar 2013 10:20:58 +0000 Subject: [PATCH] added name check to assemble_pairs2 git-svn-id: http://biopieces.googlecode.com/svn/trunk@2141 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/assemble_pairs2 | 64 +++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/bp_bin/assemble_pairs2 b/bp_bin/assemble_pairs2 index 611ca56..66c2a7b 100755 --- a/bp_bin/assemble_pairs2 +++ b/bp_bin/assemble_pairs2 @@ -28,12 +28,31 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -#require 'profile' require 'maasha/biopieces' require 'maasha/seq' require 'maasha/seq/assemble' require 'pp' +def names_match(entry1, entry2) + if entry1.seq_name =~ /^([^ ]+) \d:/ + name1 = $1 + elsif entry1.seq_name =~ /^(.+)\/\d$/ + name1 = $1 + else + raise "Could not match sequence name: #{entry1.seq_name}" + end + + if entry2.seq_name =~ /^([^ ]+) \d:/ + name2 = $1 + elsif entry2.seq_name =~ /^(.+)\/\d$/ + name2 = $1 + else + raise "Could not match sequence name: #{entry2.seq_name}" + end + + name1 == name2 +end + casts = [] casts << {:long=>'mismatches', :short=>'m', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil} casts << {:long=>'overlap_min', :short=>'o', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"} @@ -57,26 +76,31 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| entry2 and entry1.length >= options[:overlap_min] and entry2.length >= options[:overlap_min] - entry2.type = :dna - entry2.reverse!.complement! - - merged = Assemble.pair( - entry1, - entry2, - mismatches_max:options[:mismatches], - overlap_min:options[:overlap_min], - overlap_max:options[:overlap_max] - ) - - if merged - new_record = merged.to_bp - - if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/ - new_record[:OVERLAP_LEN] = $1 - new_record[:HAMMING_DIST] = $2 - end - output.puts new_record + if names_match(entry1, entry2) + entry2.type = :dna + entry2.reverse!.complement! + + merged = Assemble.pair( + entry1, + entry2, + mismatches_max:options[:mismatches], + overlap_min:options[:overlap_min], + overlap_max:options[:overlap_max] + ) + + if merged + new_record = merged.to_bp + + if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/ + new_record[:OVERLAP_LEN] = $1 + new_record[:HAMMING_DIST] = $2 + end + + output.puts new_record + end + else + raise "name mismatch: #{entry1.seq_name} != #{entry2.seq_name}" end entry1 = nil -- 2.39.2