X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fkmer_freq;h=cbe99a8d8e87b210e578b19ed55c24c99b8aff4b;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=733eed3bb186d0a6d78eaf54ed562833e29e7361;hpb=45d8f994932171f95201decf1e74f9ad1dac5a05;p=biopieces.git diff --git a/bp_bin/kmer_freq b/bp_bin/kmer_freq index 733eed3..cbe99a8 100755 --- a/bp_bin/kmer_freq +++ b/bp_bin/kmer_freq @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2011 Martin A. Hansen. +# Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -28,37 +28,44 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -require 'biopieces' -require 'seq' -require 'pp' +require 'maasha/biopieces' +require 'maasha/seq' casts = [] -casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'} -casts << {:long=>'type', :short=>'t', :type=>'string', :mandatory=>false, :default=>"dna", :allowed=>"dna,rna,protein", :disallowed=>nil} - -bp = Biopieces.new +casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'} -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) -oligos = Seq.generate_oligos(options[:size], options[:type]) +size = options[:size] +kmer_hash = Hash.new { 0 } -bp.each_record do |record| - if record.has_key? :SEQ - kmers = {} - oligos.each { |oligo| kmers[oligo.upcase] = 0 } +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] + seq = record[:SEQ].upcase - (0 ... record[:SEQ].length - options[:size]).each do |i| - kmer = record[:SEQ][i .. i + options[:size] - 1].upcase - kmers[kmer] += 1 if kmers[kmer] + (0 .. seq.length - size).each do |i| + kmer = seq[i ... i + size] + kmer_hash[kmer] += 1 + end end - record.merge! kmers + output.puts record end - bp.puts record + kmer_hash.each do |kmer, count| + record = { + :REC_TYPE => "KMER_FREQ", + :KMER => kmer, + :COUNT => count, + :FREQ => (count.to_f / kmer_hash.size).round(4) + } + + output.puts record + end end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<