X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fassemble_pairs2;h=66c2a7bd68015572df1cb530746e539005027cce;hb=256548158f770fa6b518a5cc86858655bdb910fe;hp=c7bfe1bc7007dbca8cd24c98d8c434b554585882;hpb=b80cf7b1b2e4f2b1afd992b2821ac071ee0f2c79;p=biopieces.git diff --git a/bp_bin/assemble_pairs2 b/bp_bin/assemble_pairs2 index c7bfe1b..66c2a7b 100755 --- a/bp_bin/assemble_pairs2 +++ b/bp_bin/assemble_pairs2 @@ -28,12 +28,31 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -#require 'profile' require 'maasha/biopieces' require 'maasha/seq' require 'maasha/seq/assemble' require 'pp' +def names_match(entry1, entry2) + if entry1.seq_name =~ /^([^ ]+) \d:/ + name1 = $1 + elsif entry1.seq_name =~ /^(.+)\/\d$/ + name1 = $1 + else + raise "Could not match sequence name: #{entry1.seq_name}" + end + + if entry2.seq_name =~ /^([^ ]+) \d:/ + name2 = $1 + elsif entry2.seq_name =~ /^(.+)\/\d$/ + name2 = $1 + else + raise "Could not match sequence name: #{entry2.seq_name}" + end + + name1 == name2 +end + casts = [] casts << {:long=>'mismatches', :short=>'m', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil} casts << {:long=>'overlap_min', :short=>'o', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"} @@ -51,31 +70,37 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| entry1 = Seq.new_bp(record) elsif entry2.nil? entry2 = Seq.new_bp(record) - entry2.type = :dna - entry2.reverse!.complement! end if entry1 and entry2 and entry1.length >= options[:overlap_min] and entry2.length >= options[:overlap_min] - merged = Assemble.pair( - entry1, - entry2, - mismatches_max:options[:mismatches], - overlap_min:options[:overlap_min], - overlap_max:options[:overlap_max] - ) - - if merged - new_record = merged.to_bp - - if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/ - new_record[:OVERLAP_LEN] = $1 - new_record[:HAMMING_DIST] = $2 - end - output.puts new_record + if names_match(entry1, entry2) + entry2.type = :dna + entry2.reverse!.complement! + + merged = Assemble.pair( + entry1, + entry2, + mismatches_max:options[:mismatches], + overlap_min:options[:overlap_min], + overlap_max:options[:overlap_max] + ) + + if merged + new_record = merged.to_bp + + if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/ + new_record[:OVERLAP_LEN] = $1 + new_record[:HAMMING_DIST] = $2 + end + + output.puts new_record + end + else + raise "name mismatch: #{entry1.seq_name} != #{entry2.seq_name}" end entry1 = nil