]> git.donarmstrong.com Git - biopieces.git/commitdiff
refactored shred_seq
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 3 Feb 2011 19:04:29 +0000 (19:04 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 3 Feb 2011 19:04:29 +0000 (19:04 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1253 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/shred_seq
bp_test/out/shred_seq.out.1

index 6c1a8d0623adfefa5c288178297b73c2b79f7622..f9e1a42d1181fe204cfadf8cde7c232fbb505aa0 100755 (executable)
@@ -49,8 +49,9 @@ class Seq
     sum     = 0
     strand  = '+'
 
-    while (sum / self.seq.length) < max_cov
-      entry = get_random_seq(size, strand)
+    while coverage(sum) < max_cov
+      entry = self.subseq_rand(size)
+      entry.revcomp if strand == '-'
 
       if block_given?
         yield entry
@@ -68,25 +69,9 @@ class Seq
 
   private
 
-  # Method that picks a random subsequence of a given size from a sequence.
-  # The position of the subsequence is appended to the sequence name along
-  # with the strand. The sequence is reverse complemented in case of minus
-  # strand.
-  def get_random_seq(size, strand)
-    if self.seq.size - size == 0
-      start = 0
-    else
-      start = rand(self.seq.size - size)
-    end
-
-    stop     = start + size - 1
-    seq_name = self.seq_name + "[#{start + 1}-#{stop + 1}:#{strand}]"
-    seq      = self.seq[start .. stop]
-    entry    = Seq.new(seq_name, seq, 'dna')
-
-    entry.revcomp if strand == '-'
-
-    entry
+  # Method that returns the coverage of subsequences.
+  def coverage(sum)
+    sum / self.seq.length
   end
 end
 
@@ -100,7 +85,8 @@ options = bp.parse(ARGV, casts)
 
 bp.each_record do |record|
   if record.has_key? :SEQ and record[:SEQ].length >= options[:size]
-    entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+    entry      = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+    entry.type = 'dna'
 
     entry.shred(options[:size], options[:coverage]) do |subentry|
       bp.puts subentry.to_bp
index e5902cdc1aae1087b61042b62d1bda288b72efb6..c3d3321cebc8c1ad8d9e0d36fb10b69f0177646c 100644 (file)
@@ -1,8 +1,8 @@
-SEQ_NAME: test[1-18:+]
+SEQ_NAME: test
 SEQ: ATGCACATTCGACTAGCA
 SEQ_LEN: 18
 ---
-SEQ_NAME: test[1-18:-]
+SEQ_NAME: test
 SEQ: TGCTAGTCGAATGTGCAT
 SEQ_LEN: 18
 ---