From 674f7ca55a64bce4d447c0e5a51f43d6c70df07d Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 3 Feb 2011 19:04:29 +0000 Subject: [PATCH] refactored shred_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@1253 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/shred_seq | 30 ++++++++---------------------- bp_test/out/shred_seq.out.1 | 4 ++-- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/bp_bin/shred_seq b/bp_bin/shred_seq index 6c1a8d0..f9e1a42 100755 --- a/bp_bin/shred_seq +++ b/bp_bin/shred_seq @@ -49,8 +49,9 @@ class Seq sum = 0 strand = '+' - while (sum / self.seq.length) < max_cov - entry = get_random_seq(size, strand) + while coverage(sum) < max_cov + entry = self.subseq_rand(size) + entry.revcomp if strand == '-' if block_given? yield entry @@ -68,25 +69,9 @@ class Seq private - # Method that picks a random subsequence of a given size from a sequence. - # The position of the subsequence is appended to the sequence name along - # with the strand. The sequence is reverse complemented in case of minus - # strand. - def get_random_seq(size, strand) - if self.seq.size - size == 0 - start = 0 - else - start = rand(self.seq.size - size) - end - - stop = start + size - 1 - seq_name = self.seq_name + "[#{start + 1}-#{stop + 1}:#{strand}]" - seq = self.seq[start .. stop] - entry = Seq.new(seq_name, seq, 'dna') - - entry.revcomp if strand == '-' - - entry + # Method that returns the coverage of subsequences. + def coverage(sum) + sum / self.seq.length end end @@ -100,7 +85,8 @@ options = bp.parse(ARGV, casts) bp.each_record do |record| if record.has_key? :SEQ and record[:SEQ].length >= options[:size] - entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES]) + entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES]) + entry.type = 'dna' entry.shred(options[:size], options[:coverage]) do |subentry| bp.puts subentry.to_bp diff --git a/bp_test/out/shred_seq.out.1 b/bp_test/out/shred_seq.out.1 index e5902cd..c3d3321 100644 --- a/bp_test/out/shred_seq.out.1 +++ b/bp_test/out/shred_seq.out.1 @@ -1,8 +1,8 @@ -SEQ_NAME: test[1-18:+] +SEQ_NAME: test SEQ: ATGCACATTCGACTAGCA SEQ_LEN: 18 --- -SEQ_NAME: test[1-18:-] +SEQ_NAME: test SEQ: TGCTAGTCGAATGTGCAT SEQ_LEN: 18 --- -- 2.39.5