#!/usr/bin/env ruby
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Order records so SEQ_NAME/1 and SEQ_NAME/2 are in order.
+# Order records with pair end sequence data.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
require 'maasha/biopieces'
require 'pp'
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record[:SEQ_NAME]
- name = record[:SEQ_NAME][0 ... -2].to_sym
- pair = record[:SEQ_NAME][-1].to_i
+ case record[:SEQ_NAME]
+ when /^(.+)\/(\d)$/ # Illumina 1.5
+ name = $1.to_sym
+ pair = $2.to_i
+ when /^(.+) (\d):/ # Illumina 1.8
+ name = $1.to_sym
+ pair = $2.to_i
+ else
+ $stderr.puts "WARNING: Unmatched sequence name: #{record[:SEQ_NAME]}"
+ end
if pair == 1
- if hash2.has_key? name
+ if hash2[name]
+ record[:ORDER] = "paired"
+ hash2[name][:ORDER] = "paired"
output.puts record
output.puts hash2[name]
hash1[name] = record
end
elsif pair == 2
- if hash1.has_key? name
+ if hash1[name]
+ hash1[name][:ORDER] = "paired"
+ record[:ORDER] = "paired"
output.puts hash1[name]
output.puts record
else
hash2[name] = record
end
- else
- $stderr.puts "WARNING: Badly formatted name: #{record[:SEQ_NAME]}" if options[:verbose]
end
end
end
-end
-if options[:verbose]
- $stderr.puts "Unmatched names with /1: #{hash1.size}"
- $stderr.puts "Unmatched names with /2: #{hash2.size}"
+ hash1.each_value do |record|
+ record[:ORDER] = "orphan 1"
+ output.puts record
+ end
+
+ hash2.each_value do |record|
+ record[:ORDER] = "orphan 2"
+ output.puts record
+ end
end