From: martinahansen Date: Mon, 31 Jan 2011 20:47:49 +0000 (+0000) Subject: added calc_N50 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=8b48f161521c6cfb488a3b982a620394a3dc5d43;p=biopieces.git added calc_N50 git-svn-id: http://biopieces.googlecode.com/svn/trunk@1238 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/calc_N50 b/bp_bin/calc_N50 new file mode 100755 index 0000000..3b7d4e4 --- /dev/null +++ b/bp_bin/calc_N50 @@ -0,0 +1,71 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Calculate n50 for sequences in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'biopieces' +require 'pp' + +casts = [] +casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +total = 0 +lengths = [] + +bp.each_record do |record| + bp.puts record unless options[:no_stream] + + if record.has_key? :SEQ + total += record[:SEQ].length + lengths << record[:SEQ].length + end +end + +bp.out = Stream.write(options[:data_out]) + +count = 0 + +lengths.sort.reverse.each do |length| + count += length + + if count >= total * 0.50 + bp.puts "N50" => length + break + end +end + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__