X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fslice_align;h=ed561981869c7549acfd4e5cea8e0d1b0fd5d6c5;hb=31c64ec40cca2f80c424068b69327832be625e52;hp=586c2701570dfbd46d546a3cdf0e3bc012f731e5;hpb=04c4ab8721f07155ac6b02d619c9623f82948ec6;p=biopieces.git diff --git a/bp_bin/slice_align b/bp_bin/slice_align index 586c270..ed56198 100755 --- a/bp_bin/slice_align +++ b/bp_bin/slice_align @@ -24,7 +24,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Join sequences in the stream. +# Slice aligned sequences in the stream to obtain subsequences. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -32,10 +32,13 @@ require 'maasha/biopieces' require 'maasha/fasta' require 'maasha/seq' +indels = Regexp.new(/-|\.|~/) + casts = [] casts << {long: 'beg', short: 'b', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} casts << {long: 'end', short: 'e', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} casts << {long: 'forward', short: 'f', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'forward_rc', short: 'F', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} casts << {long: 'reverse', short: 'r', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} casts << {long: 'reverse_rc', short: 'R', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} casts << {long: 'template_file', short: 't', type: 'file!', mandatory: false, default: nil, allowed: nil, disallowed: nil} @@ -50,9 +53,13 @@ if options[:beg] options[:beg] -= 1 options[:end] -= 1 raise "--beg (#{options[:beg]}) must be less than --end (#{options[:end]})" if options[:beg] > options[:end] -elsif options[:forward] +elsif options[:forward] or options[:forward_rc] raise "both --forward and --reverse or --reverse_rc must be specified" unless options[:reverse] or options[:reverse_rc] + if options[:forward_rc] + options[:forward] = Seq.new(seq: options[:forward_rc], type: :dna).reverse.complement.seq + end + if options[:reverse_rc] options[:reverse] = Seq.new(seq: options[:reverse_rc], type: :dna).reverse.complement.seq end @@ -60,13 +67,51 @@ else raise "either --beg/--end or --forward/--reverse|--reverse_rc must be specified" end +if options[:template_file] + template = Fasta.open(options[:template_file]).get_entry + + if options[:beg] + mbeg = options[:beg] + mend = options[:end] + i = 0 + + while template.seq[i] + unless template.seq[i].match indels + if mbeg > 0 + mbeg -= 1 + mend -= 1 + else + options[:beg] = i + break + end + end + + i += 1 + end + + while template.seq[i] + unless template.seq[i].match indels + if mend > 0 + mend -= 1 + else + options[:end] = i + break + end + end + + i += 1 + end + end +end + Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| if record[:SEQ] entry = Seq.new(seq: record[:SEQ]) unless options[:beg] - compact = Seq.new(seq: entry.seq.dup) + raise "template length != alignment length" if template and template.length != entry.length + compact = template ? template : Seq.new(seq: entry.seq.dup) compact.seq.delete! "-.~" fmatch = compact.patmatch(options[:forward], @@ -86,8 +131,6 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| mbeg = fmatch.pos mend = rmatch.pos + rmatch.length - 1 - indels = Regexp.new(/-|\.|~/) - i = 0 while entry.seq[i]