require 'maasha/usearch'
casts = []
-casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.97, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'cluster_min', :short=>'c', :type=>'uint', :mandatory=>true, :default=>2, :allowed=>nil, :disallowed=>"0"}
+casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.97, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'cluster_min', :short=>'c', :type=>'uint', :mandatory=>true, :default=>2, :allowed=>nil, :disallowed=>"0"}
+casts << {:long=>'frequency_min', :short=>'f', :type=>'uint', :mandatory=>true, :default=>2, :allowed=>nil, :disallowed=>"0"}
+casts << {:long=>'quality_min', :short=>'q', :type=>'uint', :mandatory=>true, :default=>20, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'consensus_min', :short=>'C', :type=>'float', :mandatory=>true, :default=>0.2, :allowed=>nil, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
tmpdir = Biopieces.mktmpdir
uc.each_alignment do |align|
if align.members >= options[:cluster_min]
+ align.options = options
+
alignment_to_fastq(align.entries, index)
cons = align.consensus
--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2012 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Duplicate records in the stream using the value to a specified key.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'maasha/biopieces'
+
+casts = []
+casts << {:long=>'key', :short=>'k', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+key = options[:key].to_sym
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record.has_key? key
+ (0 ... record[key].to_i).each { output.puts record }
+ else
+ output.puts record
+ end
+ end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+ID: test1
+COUNT: 0
+---
+ID: test2
+COUNT: 1
+---
+ID: test3
+COUNT: 2
+---
+ID: test4
+COUNT: -1
+---
+ID: test5
+COUNT: fisk
+---
+ID: test6
+---
--- /dev/null
+ID: test2
+COUNT: 1
+---
+ID: test3
+COUNT: 2
+---
+ID: test3
+COUNT: 2
+---
+ID: test6
+---
--- /dev/null
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -I $in -k COUNT -O $tmp"
+assert_no_diff $tmp $out.1
+clean
class AlignError < StandardError; end;
-CONSENSUS = 0.20
+CONS_MIN = 0.20
FREQ_MIN = 2
QUAL_MIN = 20
ALPH_DNA = %w{A T C G}
# Class for aligning sequences.
class Align
- attr_reader :entries
+ attr_accessor :options
+ attr_reader :entries
# Class method to align sequences in a list of Seq objects and
# return these as a new list of Seq objects.
end
# Method to initialize an Align object with a list of aligned Seq objects.
- def initialize(entries)
+ def initialize(entries, options = {})
@entries = entries
+ @options = options
end
# Method that returns the length of the alignment.
# Method to create a consensus sequence from an Align object and
# return a new Seq object with the consensus.
def consensus
- cons = Consensus.new(@entries)
+ cons = Consensus.new(@entries, @options)
cons.to_seq
end
# Method to pretty print an alignment from an Align object.
def to_s
- cons = self.consensus
- cons.mask_seq_soft!(QUAL_MIN) unless cons.qual.nil?
+ qual_min = @options[:quality_min] || QUAL_MIN
+
+ cons = self.consensus
+ cons.mask_seq_soft!(qual_min) unless cons.qual.nil?
max_name = @entries.group_by { |entry| entry.seq_name.length }.max.first
@entries.each do |entry|
- entry.mask_seq_soft!(QUAL_MIN) unless entry.qual.nil?
+ entry.mask_seq_soft!(qual_min) unless entry.qual.nil?
puts entry.seq_name + (" " * (max_name + 3 - entry.seq_name.length )) + entry.seq
end
class Consensus
# Method to initialize a Consensus object given a list of aligned Seq object.
- def initialize(entries)
+ def initialize(entries, options)
@entries = entries
- @cols = entries.first.length
- @rows = entries.size
+ @options = options
+
+ @cons_min = options[:consensus_min] || CONS_MIN
+ @freq_min = options[:frequency_min] || FREQ_MIN
+ @qual_min = options[:quality_min] || QUAL_MIN
+
+ @cols = entries.first.length
+ @rows = entries.size
@has_qual = entries.first.qual.nil? ? false : true
end
def mask_high_qual
- @na_qual > QUAL_MIN
+ @na_qual > @qual_min
end
def mask_conserved_columns
mask_C = @na_seq & BIT_C > 0
mask_G = @na_seq & BIT_G > 0
- mask_A = (mask_A * mask_A.sum(1)).to_f * factor > CONSENSUS
- mask_T = (mask_T * mask_T.sum(1)).to_f * factor > CONSENSUS
- mask_C = (mask_C * mask_C.sum(1)).to_f * factor > CONSENSUS
- mask_G = (mask_G * mask_G.sum(1)).to_f * factor > CONSENSUS
+ mask_A = (mask_A * mask_A.sum(1)).to_f * factor > @cons_min
+ mask_T = (mask_T * mask_T.sum(1)).to_f * factor > @cons_min
+ mask_C = (mask_C * mask_C.sum(1)).to_f * factor > @cons_min
+ mask_G = (mask_G * mask_G.sum(1)).to_f * factor > @cons_min
mask_A | mask_T | mask_C | mask_G
end