X-Git-Url: https://git.donarmstrong.com/?p=biopieces.git;a=blobdiff_plain;f=bp_bin%2Ffind_homopolymers;h=c0885e3d3bb010a1363af0ff57dd24e8acf819cc;hp=89b8e8c1d03c3360fe1463855c437b31ee9318c8;hb=740804b81930af277b29c64f8b8276daff633f44;hpb=c932def8cbb86723bbce9ca79692ebbed59e9528 diff --git a/bp_bin/find_homopolymers b/bp_bin/find_homopolymers index 89b8e8c..c0885e3 100755 --- a/bp_bin/find_homopolymers +++ b/bp_bin/find_homopolymers @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -31,9 +31,14 @@ require 'maasha/biopieces' require 'maasha/seq' +require 'pp' + +class Seq; include Homopolymer; end casts = [] -casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'limit', :short=>'l', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'longest', :short=>'L', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) @@ -42,10 +47,42 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| if record[:SEQ] seq = Seq.new(nil, record[:SEQ]) - record[:HOMOPOL_MAX] = seq.homopol_max(options[:min]) + longest = Seq::Homopolymer.new("", 0, 0) + got_one = false + count = 0 + + seq.each_homopolymer(options[:min]) do |h| + got_one = true + + record[:HOMOPOL_PAT] = h.pattern + record[:HOMOPOL_LEN] = h.length + record[:HOMOPOL_POS] = h.pos + + if options[:longest] + longest = h.length > longest.length ? h : longest + else + output.puts record + + count += 1 + + break if options[:limit] and options[:limit] == count + end + end + + if options[:longest] + if longest.length > options[:min] + record[:HOMOPOL_PAT] = longest.pattern + record[:HOMOPOL_LEN] = longest.length + record[:HOMOPOL_POS] = longest.pos + end + + output.puts record + elsif not got_one + output.puts record + end + else + output.puts record end - - output.puts record end end