#!/usr/bin/env ruby
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
require 'maasha/biopieces'
require 'maasha/seq'
-require 'pp'
casts = []
-casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'type', :short=>'t', :type=>'string', :mandatory=>false, :default=>"dna", :allowed=>"dna,rna,protein", :disallowed=>nil}
-
-bp = Biopieces.new
+casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>false, :default=>4, :allowed=>nil, :disallowed=>'0'}
-options = bp.parse(ARGV, casts)
+options = Biopieces.options_parse(ARGV, casts)
-oligos = Seq.generate_oligos(options[:size], options[:type])
+size = options[:size]
+kmer_hash = Hash.new { 0 }
-bp.each_record do |record|
- if record.has_key? :SEQ
- kmers = {}
- oligos.each { |oligo| kmers[oligo.upcase] = 0 }
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record[:SEQ]
+ seq = record[:SEQ].upcase
- (0 ... record[:SEQ].length - options[:size]).each do |i|
- kmer = record[:SEQ][i .. i + options[:size] - 1].upcase
- kmers[kmer] += 1 if kmers[kmer]
+ (0 .. seq.length - size).each do |i|
+ kmer = seq[i ... i + size]
+ kmer_hash[kmer] += 1
+ end
end
- record.merge! kmers
+ output.puts record
end
- bp.puts record
+ kmer_hash.each do |kmer, count|
+ record = {
+ :REC_TYPE => "KMER_FREQ",
+ :KMER => kmer,
+ :COUNT => count,
+ :FREQ => (count.to_f / kmer_hash.size).round(4)
+ }
+
+ output.puts record
+ end
end
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<