]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/shred_seq
refactored shred_seq
[biopieces.git] / bp_bin / shred_seq
index 6c1a8d0623adfefa5c288178297b73c2b79f7622..f9e1a42d1181fe204cfadf8cde7c232fbb505aa0 100755 (executable)
@@ -49,8 +49,9 @@ class Seq
     sum     = 0
     strand  = '+'
 
-    while (sum / self.seq.length) < max_cov
-      entry = get_random_seq(size, strand)
+    while coverage(sum) < max_cov
+      entry = self.subseq_rand(size)
+      entry.revcomp if strand == '-'
 
       if block_given?
         yield entry
@@ -68,25 +69,9 @@ class Seq
 
   private
 
-  # Method that picks a random subsequence of a given size from a sequence.
-  # The position of the subsequence is appended to the sequence name along
-  # with the strand. The sequence is reverse complemented in case of minus
-  # strand.
-  def get_random_seq(size, strand)
-    if self.seq.size - size == 0
-      start = 0
-    else
-      start = rand(self.seq.size - size)
-    end
-
-    stop     = start + size - 1
-    seq_name = self.seq_name + "[#{start + 1}-#{stop + 1}:#{strand}]"
-    seq      = self.seq[start .. stop]
-    entry    = Seq.new(seq_name, seq, 'dna')
-
-    entry.revcomp if strand == '-'
-
-    entry
+  # Method that returns the coverage of subsequences.
+  def coverage(sum)
+    sum / self.seq.length
   end
 end
 
@@ -100,7 +85,8 @@ options = bp.parse(ARGV, casts)
 
 bp.each_record do |record|
   if record.has_key? :SEQ and record[:SEQ].length >= options[:size]
-    entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+    entry      = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+    entry.type = 'dna'
 
     entry.shred(options[:size], options[:coverage]) do |subentry|
       bp.puts subentry.to_bp