# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-require 'biopieces'
-require 'seq'
+require 'maasha/biopieces'
+require 'maasha/seq'
require 'pp'
class Seq
# seq.shred { |subseq| } -> Seq
# seq.shred -> []
def shred(size, max_cov)
+ raise SeqError, "Size: #{size} < 1" if size < 1
+ raise SeqError, "Max coverage: #{max_cov} < 1" if max_cov < 1
+
entries = []
sum = 0
strand = '+'
- while (sum / self.seq.length) < max_cov
- start = rand(self.seq.size - size)
- stop = start + size - 1
- seq_name = self.seq_name + "[#{start + 1}-#{stop + 1}:#{strand}]"
- seq = self.seq[start .. stop]
- entry = Seq.new(seq_name, seq, 'dna')
-
- entry.revcomp if strand == '-'
+ while coverage(sum) < max_cov
+ entry = self.subseq_rand(size)
+ entry.reverse!.complement! if strand == '-'
if block_given?
yield entry
entries
end
+
+ private
+
+ # Method that returns the coverage of subsequences.
+ def coverage(sum)
+ sum / self.seq.length
+ end
end
casts = []
casts << {:long=>'size', :short=>'s', :type=>'uint', :mandatory=>true, :default=>500, :allowed=>nil, :disallowed=>'0'}
casts << {:long=>'coverage', :short=>'c', :type=>'uint', :mandatory=>true, :default=>100, :allowed=>nil, :disallowed=>'0'}
-bp = Biopieces.new
-
-options = bp.parse(ARGV, casts)
+options = Biopieces.options_parse(ARGV, casts)
-bp.each_record do |record|
- if record.has_key? :SEQ
- entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record.has_key? :SEQ and record[:SEQ].length >= options[:size]
+ entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+ entry.type = 'dna'
- entry.shred(options[:size], options[:coverage]) do |subentry|
- bp.puts subentry.to_bp
+ entry.shred(options[:size], options[:coverage]) do |subentry|
+ output.puts subentry.to_bp
+ end
end
end
end
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<