# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Join sequences in the stream.
+# Slice aligned sequences in the stream to obtain subsequences.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'maasha/fasta'
require 'maasha/seq'
+indels = Regexp.new(/-|\.|~/)
+
casts = []
casts << {long: 'beg', short: 'b', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"}
casts << {long: 'end', short: 'e', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"}
casts << {long: 'forward', short: 'f', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'forward_rc', short: 'F', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
casts << {long: 'reverse', short: 'r', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
casts << {long: 'reverse_rc', short: 'R', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
casts << {long: 'template_file', short: 't', type: 'file!', mandatory: false, default: nil, allowed: nil, disallowed: nil}
options[:beg] -= 1
options[:end] -= 1
raise "--beg (#{options[:beg]}) must be less than --end (#{options[:end]})" if options[:beg] > options[:end]
-elsif options[:forward]
+elsif options[:forward] or options[:forward_rc]
raise "both --forward and --reverse or --reverse_rc must be specified" unless options[:reverse] or options[:reverse_rc]
+ if options[:forward_rc]
+ options[:forward] = Seq.new(seq: options[:forward_rc], type: :dna).reverse.complement.seq
+ end
+
if options[:reverse_rc]
options[:reverse] = Seq.new(seq: options[:reverse_rc], type: :dna).reverse.complement.seq
end
raise "either --beg/--end or --forward/--reverse|--reverse_rc must be specified"
end
+if options[:template_file]
+ template = Fasta.open(options[:template_file]).get_entry
+
+ if options[:beg]
+ mbeg = options[:beg]
+ mend = options[:end]
+ i = 0
+
+ while template.seq[i]
+ unless template.seq[i].match indels
+ if mbeg > 0
+ mbeg -= 1
+ mend -= 1
+ else
+ options[:beg] = i
+ break
+ end
+ end
+
+ i += 1
+ end
+
+ while template.seq[i]
+ unless template.seq[i].match indels
+ if mend > 0
+ mend -= 1
+ else
+ options[:end] = i
+ break
+ end
+ end
+
+ i += 1
+ end
+ end
+end
+
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record[:SEQ]
entry = Seq.new(seq: record[:SEQ])
unless options[:beg]
- compact = Seq.new(seq: entry.seq.dup)
+ raise "template length != alignment length" if template and template.length != entry.length
+ compact = template ? template : Seq.new(seq: entry.seq.dup)
compact.seq.delete! "-.~"
fmatch = compact.patmatch(options[:forward],
mbeg = fmatch.pos
mend = rmatch.pos + rmatch.length - 1
- indels = Regexp.new(/-|\.|~/)
-
i = 0
while entry.seq[i]