X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fshred_seq;h=322916426e606dd11259eb6f34757486296e039d;hb=a30677c14f1738f6a76e8c12f2e732cdef9958d6;hp=a59b688349e56bc76de5af42e6f401a66470b620;hpb=d8fb5588758b9d40e562ee66519ee6772ad511a9;p=biopieces.git diff --git a/bp_bin/shred_seq b/bp_bin/shred_seq index a59b688..3229164 100755 --- a/bp_bin/shred_seq +++ b/bp_bin/shred_seq @@ -29,8 +29,8 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -require 'biopieces' -require 'seq' +require 'maasha/biopieces' +require 'maasha/seq' require 'pp' class Seq @@ -42,18 +42,16 @@ class Seq # seq.shred { |subseq| } -> Seq # seq.shred -> [] def shred(size, max_cov) + raise SeqError, "Size: #{size} < 1" if size < 1 + raise SeqError, "Max coverage: #{max_cov} < 1" if max_cov < 1 + entries = [] sum = 0 strand = '+' - while (sum / self.seq.length) < max_cov - start = rand(self.seq.size - size) - stop = start + size - 1 - seq_name = self.seq_name + "[#{start + 1}-#{stop + 1}:#{strand}]" - seq = self.seq[start .. stop] - entry = Seq.new(seq_name, seq, 'dna') - - entry.revcomp if strand == '-' + while coverage(sum) < max_cov + entry = self.subseq_rand(size) + entry.reverse!.complement! if strand == '-' if block_given? yield entry @@ -68,26 +66,35 @@ class Seq entries end + + private + + # Method that returns the coverage of subsequences. + def coverage(sum) + sum / self.seq.length + end end casts = [] casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>true, :default=>500, :allowed=>nil, :disallowed=>'0'} casts << {:long=>'coverage', :short=>'c', :type=>'uint', :mandatory=>true, :default=>100, :allowed=>nil, :disallowed=>'0'} -bp = Biopieces.new - -options = bp.parse(ARGV, casts) +options = Biopieces.options_parse(ARGV, casts) -bp.each_record do |record| - if record.has_key? :SEQ - entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES]) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] and record[:SEQ].length >= options[:size] + entry = Seq.new_bp(record) + entry.type = :dna - entry.shred(options[:size], options[:coverage]) do |subentry| - bp.puts subentry.to_bp + entry.shred(options[:size], options[:coverage]) do |subentry| + output.puts subentry.to_bp + end end end end + # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<