X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Ffind_homopolymers;h=c0885e3d3bb010a1363af0ff57dd24e8acf819cc;hb=2f0fd91b461033529a4a72e161bd133252a22eb6;hp=5c9cd245dbcef4ba3373a512783b7b0756502510;hpb=494dc53ebd515b1e3e9b91bbebf43059899ca4ce;p=biopieces.git diff --git a/bp_bin/find_homopolymers b/bp_bin/find_homopolymers index 5c9cd24..c0885e3 100755 --- a/bp_bin/find_homopolymers +++ b/bp_bin/find_homopolymers @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -33,21 +33,57 @@ require 'maasha/biopieces' require 'maasha/seq' require 'pp' -casts = [] -casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} - -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +class Seq; include Homopolymer; end -bp.each_record do |record| - if record.has_key? :SEQ - seq = Seq.new(nil, record[:SEQ]) - - record[:HOMOPOL_MAX] = seq.homopol_max(options[:min]) +casts = [] +casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'limit', :short=>'l', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'longest', :short=>'L', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + +options = Biopieces.options_parse(ARGV, casts) + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] + seq = Seq.new(nil, record[:SEQ]) + + longest = Seq::Homopolymer.new("", 0, 0) + got_one = false + count = 0 + + seq.each_homopolymer(options[:min]) do |h| + got_one = true + + record[:HOMOPOL_PAT] = h.pattern + record[:HOMOPOL_LEN] = h.length + record[:HOMOPOL_POS] = h.pos + + if options[:longest] + longest = h.length > longest.length ? h : longest + else + output.puts record + + count += 1 + + break if options[:limit] and options[:limit] == count + end + end + + if options[:longest] + if longest.length > options[:min] + record[:HOMOPOL_PAT] = longest.pattern + record[:HOMOPOL_LEN] = longest.length + record[:HOMOPOL_POS] = longest.pos + end + + output.puts record + elsif not got_one + output.puts record + end + else + output.puts record + end end - - bp.puts record end