X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Ffind_homopolymers;h=c7c98f0023e7c738f50f9f301c261ec563ed39eb;hb=04c4ab8721f07155ac6b02d619c9623f82948ec6;hp=3cad86f505fd60a28a251dbe7967561347ed4098;hpb=c4f14c511655d92281b6d70363de57b77a9b6045;p=biopieces.git diff --git a/bp_bin/find_homopolymers b/bp_bin/find_homopolymers index 3cad86f..c7c98f0 100755 --- a/bp_bin/find_homopolymers +++ b/bp_bin/find_homopolymers @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2013 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -31,21 +31,58 @@ require 'maasha/biopieces' require 'maasha/seq' +require 'pp' + +class Seq; include Homopolymer; end casts = [] -casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'limit', :short=>'l', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'longest', :short=>'L', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| - if record.has_key? :SEQ - seq = Seq.new(nil, record[:SEQ]) - - record[:HOMOPOL_MAX] = seq.homopol_max(options[:min]) + if record[:SEQ] + seq = Seq.new(seq: record[:SEQ]) + + longest = Seq::Homopolymer.new("", 0, 0) + got_one = false + count = 0 + + seq.each_homopolymer(options[:min]) do |h| + got_one = true + + record[:HOMOPOL_PAT] = h.pattern + record[:HOMOPOL_LEN] = h.length + record[:HOMOPOL_POS] = h.pos + + if options[:longest] + longest = h.length > longest.length ? h : longest + else + output.puts record + + count += 1 + + break if options[:limit] and options[:limit] == count + end + end + + if options[:longest] + if longest.length > options[:min] + record[:HOMOPOL_PAT] = longest.pattern + record[:HOMOPOL_LEN] = longest.length + record[:HOMOPOL_POS] = longest.pos + end + + output.puts record + elsif not got_one + output.puts record + end + else + output.puts record end - - output.puts record end end