]> git.donarmstrong.com Git - biopieces.git/commitdiff
added name check to assemble_pairs2
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 13 Mar 2013 10:20:58 +0000 (10:20 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 13 Mar 2013 10:20:58 +0000 (10:20 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2141 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/assemble_pairs2

index 611ca56609c63b236722ded3769725f94675e8e2..66c2a7bd68015572df1cb530746e539005027cce 100755 (executable)
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-#require 'profile'
 require 'maasha/biopieces'
 require 'maasha/seq'
 require 'maasha/seq/assemble'
 require 'pp'
 
+def names_match(entry1, entry2)
+  if entry1.seq_name =~ /^([^ ]+) \d:/
+    name1 = $1
+  elsif entry1.seq_name =~ /^(.+)\/\d$/
+    name1 = $1
+  else
+    raise "Could not match sequence name: #{entry1.seq_name}"
+  end
+
+  if entry2.seq_name =~ /^([^ ]+) \d:/
+    name2 = $1
+  elsif entry2.seq_name =~ /^(.+)\/\d$/
+    name2 = $1
+  else
+    raise "Could not match sequence name: #{entry2.seq_name}"
+  end
+
+  name1 == name2
+end
+
 casts = []
 casts << {:long=>'mismatches',  :short=>'m', :type=>'uint', :mandatory=>false, :default=>5,   :allowed=>nil, :disallowed=>nil}
 casts << {:long=>'overlap_min', :short=>'o', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"}
@@ -57,26 +76,31 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
          entry2 and
          entry1.length >= options[:overlap_min] and
          entry2.length >= options[:overlap_min]
-        entry2.type = :dna
-        entry2.reverse!.complement!
-
-        merged = Assemble.pair(
-          entry1,
-          entry2,
-          mismatches_max:options[:mismatches],
-          overlap_min:options[:overlap_min],
-          overlap_max:options[:overlap_max]
-        )
-
-        if merged
-          new_record = merged.to_bp
-
-          if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/
-            new_record[:OVERLAP_LEN]  = $1
-            new_record[:HAMMING_DIST] = $2
-          end
 
-          output.puts new_record
+        if names_match(entry1, entry2)
+          entry2.type = :dna
+          entry2.reverse!.complement!
+
+          merged = Assemble.pair(
+            entry1,
+            entry2,
+            mismatches_max:options[:mismatches],
+            overlap_min:options[:overlap_min],
+            overlap_max:options[:overlap_max]
+          )
+
+          if merged
+            new_record = merged.to_bp
+
+            if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/
+              new_record[:OVERLAP_LEN]  = $1
+              new_record[:HAMMING_DIST] = $2
+            end
+
+            output.puts new_record
+          end
+        else
+          raise "name mismatch: #{entry1.seq_name} != #{entry2.seq_name}"
         end
 
         entry1 = nil