X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fkmer_freq;h=cbe99a8d8e87b210e578b19ed55c24c99b8aff4b;hb=48bea5c28b89dc5586d0bddb338ccd6ba23aa1f9;hp=290f07d5b8907b013a246152dac093f3b9005d29;hpb=c4f14c511655d92281b6d70363de57b77a9b6045;p=biopieces.git diff --git a/bp_bin/kmer_freq b/bp_bin/kmer_freq index 290f07d..cbe99a8 100755 --- a/bp_bin/kmer_freq +++ b/bp_bin/kmer_freq @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2011 Martin A. Hansen. +# Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -28,34 +28,41 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - require 'maasha/biopieces' require 'maasha/seq' casts = [] -casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'} -casts << {:long=>'type', :short=>'t', :type=>'string', :mandatory=>false, :default=>"dna", :allowed=>"dna,rna,protein", :disallowed=>nil} +casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'} -options = Biopieces.options_parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) -oligos = Seq.generate_oligos(options[:size], options[:type]) +size = options[:size] +kmer_hash = Hash.new { 0 } Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| - if record.has_key? :SEQ - kmers = {} - oligos.each { |oligo| kmers[oligo.upcase] = 0 } + if record[:SEQ] + seq = record[:SEQ].upcase - (0 ... record[:SEQ].length - options[:size]).each do |i| - kmer = record[:SEQ][i .. i + options[:size] - 1].upcase - kmers[kmer] += 1 if kmers[kmer] + (0 .. seq.length - size).each do |i| + kmer = seq[i ... i + size] + kmer_hash[kmer] += 1 end - - record.merge! kmers end output.puts record end + + kmer_hash.each do |kmer, count| + record = { + :REC_TYPE => "KMER_FREQ", + :KMER => kmer, + :COUNT => count, + :FREQ => (count.to_f / kmer_hash.size).round(4) + } + + output.puts record + end end