]> git.donarmstrong.com Git - biopieces.git/commitdiff
added order_pairs2
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 31 Oct 2012 20:50:00 +0000 (20:50 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 31 Oct 2012 20:50:00 +0000 (20:50 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1976 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/order_pairs2 [new file with mode: 0755]

diff --git a/bp_bin/order_pairs2 b/bp_bin/order_pairs2
new file mode 100755 (executable)
index 0000000..3485460
--- /dev/null
@@ -0,0 +1,105 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2012 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Order records with pair end sequence data.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'maasha/biopieces'
+require 'dbm'
+require 'pp'
+
+ILLUMINA15 = 15
+ILLUMINA18 = 18
+
+options = Biopieces.options_parse(ARGV)
+
+tmpdir  = Biopieces.mktmpdir
+tmpdir  = "Sletmig"
+tmpfile = File.join(tmpdir, "data")
+
+db = DBM.new(tmpfile, 666, DBM::NEWDB)
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  input.each_record do |record|
+    if record[:SEQ_NAME]
+      db[record[:SEQ_NAME]] = Marshal.dump(record)
+    else
+      output.puts record
+    end
+  end
+
+  skip = {}
+
+  db.keys.each do |seq_name|
+    next if skip[seq_name]
+
+    case seq_name
+    when /^(.+)\/(\d)$/   # Illumina 1.5
+      type = ILLUMINA15
+      name = $1
+      pair = $2.to_i
+    when /^(.+) (\d):/    # Illumina 1.8
+      type = ILLUMINA18
+      name = $1.to_sym
+      pair = $2.to_i
+    else
+      $stderr.puts "WARNING: Unmatched sequence name: #{record[:SEQ_NAME]}"
+    end
+
+    pair2 = (pair == 1) ? 2 : 1
+
+    if type == ILLUMINA15
+      seq_name2 = "#{name}/#{pair2}"
+    else
+      seq_name2 = seq_name.sub(/^(.+) \d:/, "#{$1} #{pair2}:")
+    end
+
+    record1 = Marshal.load(db[seq_name])
+    record2 = db[seq_name2]
+    record2 = Marshal.load(record2) unless record2.nil?
+
+    if record2.nil?
+      record1[:ORDER] = "orphan #{pair}"
+      output.puts record1
+    else
+      record1[:ORDER] = "paired"
+      record2[:ORDER] = "paired"
+
+      output.puts record1
+      output.puts record2
+
+      skip[seq_name2] = true
+    end
+  end
+end
+
+File.delete(tmpfile + ".db")
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__