]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/shred_seq
refactoring of assemble_pairs
[biopieces.git] / bp_bin / shred_seq
index 5b7f7ff7760b42bee078ee0c51abc86a770d65a4..322916426e606dd11259eb6f34757486296e039d 100755 (executable)
@@ -29,8 +29,8 @@
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
-require 'biopieces'
-require 'seq'
+require 'maasha/biopieces'
+require 'maasha/seq'
 require 'pp'
 
 class Seq
@@ -42,23 +42,16 @@ class Seq
   # seq.shred { |subseq| } -> Seq
   # seq.shred              -> []
   def shred(size, max_cov)
+    raise SeqError, "Size: #{size} < 1"            if size    < 1
+    raise SeqError, "Max coverage: #{max_cov} < 1" if max_cov < 1
+
     entries = []
     sum     = 0
     strand  = '+'
 
-    while (sum / self.seq.length) < max_cov
-      if self.seq.size - size == 0
-        start = 0
-      else
-        start = rand(self.seq.size - size)
-      end
-
-      stop     = start + size - 1
-      seq_name = self.seq_name + "[#{start + 1}-#{stop + 1}:#{strand}]"
-      seq      = self.seq[start .. stop]
-      entry    = Seq.new(seq_name, seq, 'dna')
-
-      entry.revcomp if strand == '-'
+    while coverage(sum) < max_cov
+      entry = self.subseq_rand(size)
+      entry.reverse!.complement! if strand == '-'
 
       if block_given?
         yield entry
@@ -73,26 +66,35 @@ class Seq
 
     entries
   end
+
+  private
+
+  # Method that returns the coverage of subsequences.
+  def coverage(sum)
+    sum / self.seq.length
+  end
 end
 
 casts = []
 casts << {:long=>'size',     :short=>'s', :type=>'uint', :mandatory=>true, :default=>500, :allowed=>nil, :disallowed=>'0'}
 casts << {:long=>'coverage', :short=>'c', :type=>'uint', :mandatory=>true, :default=>100, :allowed=>nil, :disallowed=>'0'}
 
-bp = Biopieces.new
-
-options = bp.parse(ARGV, casts)
+options = Biopieces.options_parse(ARGV, casts)
 
-bp.each_record do |record|
-  if record.has_key? :SEQ and record[:SEQ].length >= options[:size]
-    entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  input.each_record do |record|
+    if record[:SEQ] and record[:SEQ].length >= options[:size]
+      entry      = Seq.new_bp(record)
+      entry.type = :dna
 
-    entry.shred(options[:size], options[:coverage]) do |subentry|
-      bp.puts subentry.to_bp
+      entry.shred(options[:size], options[:coverage]) do |subentry|
+        output.puts subentry.to_bp
+      end
     end
   end
 end
 
+
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<