From: martinahansen Date: Fri, 29 Nov 2013 15:08:04 +0000 (+0000) Subject: added slice_align biopiece X-Git-Url: https://git.donarmstrong.com/?p=biopieces.git;a=commitdiff_plain;h=04c4ab8721f07155ac6b02d619c9623f82948ec6 added slice_align biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@2259 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/slice_align b/bp_bin/slice_align new file mode 100755 index 0000000..586c270 --- /dev/null +++ b/bp_bin/slice_align @@ -0,0 +1,133 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Join sequences in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/biopieces' +require 'maasha/fasta' +require 'maasha/seq' + +casts = [] +casts << {long: 'beg', short: 'b', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} +casts << {long: 'end', short: 'e', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} +casts << {long: 'forward', short: 'f', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'reverse', short: 'r', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'reverse_rc', short: 'R', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'template_file', short: 't', type: 'file!', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'mismatches', short: 'm', type: 'uint', mandatory: false, default: 2, allowed: nil, disallowed: nil} +casts << {long: 'insertions', short: 'i', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: nil} +casts << {long: 'deletions', short: 'd', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: nil} + +options = Biopieces.options_parse(ARGV, casts) + +if options[:beg] + raise "both --beg and --end must be speficied" unless options[:end] + options[:beg] -= 1 + options[:end] -= 1 + raise "--beg (#{options[:beg]}) must be less than --end (#{options[:end]})" if options[:beg] > options[:end] +elsif options[:forward] + raise "both --forward and --reverse or --reverse_rc must be specified" unless options[:reverse] or options[:reverse_rc] + + if options[:reverse_rc] + options[:reverse] = Seq.new(seq: options[:reverse_rc], type: :dna).reverse.complement.seq + end +else + raise "either --beg/--end or --forward/--reverse|--reverse_rc must be specified" +end + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] + entry = Seq.new(seq: record[:SEQ]) + + unless options[:beg] + compact = Seq.new(seq: entry.seq.dup) + compact.seq.delete! "-.~" + + fmatch = compact.patmatch(options[:forward], + max_mismatches: options[:mismatches], + max_insertions: options[:insertions], + max_deletions: options[:deletions]) + + raise "forward primer: #{options[:forward]} not found" if fmatch.nil? + + rmatch = compact.patmatch(options[:reverse], + max_mismatches: options[:mismatches], + max_insertions: options[:insertions], + max_deletions: options[:deletions]) + + raise "reverse primer: #{options[:reverse]} not found" if rmatch.nil? + + mbeg = fmatch.pos + mend = rmatch.pos + rmatch.length - 1 + + indels = Regexp.new(/-|\.|~/) + + i = 0 + + while entry.seq[i] + unless entry.seq[i].match indels + if mbeg > 0 + mbeg -= 1 + mend -= 1 + else + options[:beg] = i + break + end + end + + i += 1 + end + + while entry.seq[i] + unless entry.seq[i].match indels + if mend > 0 + mend -= 1 + else + options[:end] = i + break + end + end + + i += 1 + end + end + + record[:SEQ] = entry[options[:beg] .. options[:end]].seq + record[:SEQ_LEN] = record[:SEQ].length + end + + output.puts record + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__