X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fslice_align;h=ed561981869c7549acfd4e5cea8e0d1b0fd5d6c5;hb=31c64ec40cca2f80c424068b69327832be625e52;hp=f63ac7e203c2e41c40c15d294fd11798e760b5d5;hpb=308da19f6dfbbf50bbce5aa87b32e2f269650c80;p=biopieces.git diff --git a/bp_bin/slice_align b/bp_bin/slice_align index f63ac7e..ed56198 100755 --- a/bp_bin/slice_align +++ b/bp_bin/slice_align @@ -32,10 +32,13 @@ require 'maasha/biopieces' require 'maasha/fasta' require 'maasha/seq' +indels = Regexp.new(/-|\.|~/) + casts = [] casts << {long: 'beg', short: 'b', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} casts << {long: 'end', short: 'e', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} casts << {long: 'forward', short: 'f', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'forward_rc', short: 'F', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} casts << {long: 'reverse', short: 'r', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} casts << {long: 'reverse_rc', short: 'R', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} casts << {long: 'template_file', short: 't', type: 'file!', mandatory: false, default: nil, allowed: nil, disallowed: nil} @@ -50,9 +53,13 @@ if options[:beg] options[:beg] -= 1 options[:end] -= 1 raise "--beg (#{options[:beg]}) must be less than --end (#{options[:end]})" if options[:beg] > options[:end] -elsif options[:forward] +elsif options[:forward] or options[:forward_rc] raise "both --forward and --reverse or --reverse_rc must be specified" unless options[:reverse] or options[:reverse_rc] + if options[:forward_rc] + options[:forward] = Seq.new(seq: options[:forward_rc], type: :dna).reverse.complement.seq + end + if options[:reverse_rc] options[:reverse] = Seq.new(seq: options[:reverse_rc], type: :dna).reverse.complement.seq end @@ -62,6 +69,39 @@ end if options[:template_file] template = Fasta.open(options[:template_file]).get_entry + + if options[:beg] + mbeg = options[:beg] + mend = options[:end] + i = 0 + + while template.seq[i] + unless template.seq[i].match indels + if mbeg > 0 + mbeg -= 1 + mend -= 1 + else + options[:beg] = i + break + end + end + + i += 1 + end + + while template.seq[i] + unless template.seq[i].match indels + if mend > 0 + mend -= 1 + else + options[:end] = i + break + end + end + + i += 1 + end + end end Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| @@ -70,6 +110,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| entry = Seq.new(seq: record[:SEQ]) unless options[:beg] + raise "template length != alignment length" if template and template.length != entry.length compact = template ? template : Seq.new(seq: entry.seq.dup) compact.seq.delete! "-.~" @@ -90,8 +131,6 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| mbeg = fmatch.pos mend = rmatch.pos + rmatch.length - 1 - indels = Regexp.new(/-|\.|~/) - i = 0 while entry.seq[i]