From 773dcafe5d5199784cbbb44eff03d3fe10a13cb0 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 21 Jan 2013 13:37:11 +0000 Subject: [PATCH] added assemble_pairs biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@2081 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/assemble_pairs | 117 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100755 bp_bin/assemble_pairs diff --git a/bp_bin/assemble_pairs b/bp_bin/assemble_pairs new file mode 100755 index 0000000..440810b --- /dev/null +++ b/bp_bin/assemble_pairs @@ -0,0 +1,117 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2013 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Assemble ordered overlapping pair-end sequences in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/biopieces' +require 'maasha/fastq' +require 'pp' + +casts = [] +casts << {:long=>'overlap', :short=>'o', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} + +options = Biopieces.options_parse(ARGV, casts) + +tmpdir = Biopieces.mktmpdir +file_in1 = File.join(tmpdir, "in1.fq") +file_in2 = File.join(tmpdir, "in2.fq") +file_out = File.join(tmpdir, "out.fq") + +io_in1 = Fastq.open(file_in1, 'w') +io_in2 = Fastq.open(file_in2, 'w') + +name1 = "" +name2 = "" +entry1 = "" +entry2 = "" + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ_NAME] and record[:SEQ] and record[:SCORES] + entry = Seq.new_bp(record) + + case entry.seq_name + when /^(.+)\/(\d)$/ # Illumina 1.5 + name = $1 + pair = $2.to_i + type = 15 + when /^(.+) (\d):/ # Illumina 1.8 + name = $1 + pair = $2.to_i + type = 18 + else + $stderr.puts "WARNING: Unmatched sequence name: #{entry.seq_name}" + end + + if pair == 1 + name1 = name.dup + entry1 = entry.dup + else + name2 = name.dup + entry2 = entry.dup + end + + if name1 != nil and name1 == name2 + io_in1.puts entry1.to_fastq + io_in2.puts entry2.to_fastq + end + end + end + + io_in1.close + io_in2.close + + cmd = "pandaseq" + cmd << " -6" + cmd << " -F" + cmd << " -B" + cmd << " -N" + cmd << " -o #{options[:overlap]}" + cmd << " -f #{file_in1}" + cmd << " -r #{file_in2}" + cmd << " > #{file_out}" + cmd << " 2> /dev/null" unless options[:verbose] + + $stderr.puts cmd if options[:verbose] + + system(cmd) + + raise RunTimeError "Error: command failed: #{cmd}" unless $?.success? + + Fastq.open(file_out) do |ios| + ios.each do |entry| + output.puts entry.convert_scores!('illumina18', 'illumina15').to_bp + end + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ -- 2.39.2