3 # Copyright (C) 2007-2010 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This program is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Slice aligned sequences in the stream to obtain subsequences.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
31 require 'maasha/biopieces'
32 require 'maasha/fasta'
35 indels = Regexp.new(/-|\.|~/)
38 casts << {long: 'beg', short: 'b', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"}
39 casts << {long: 'end', short: 'e', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"}
40 casts << {long: 'forward', short: 'f', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
41 casts << {long: 'forward_rc', short: 'F', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
42 casts << {long: 'reverse', short: 'r', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
43 casts << {long: 'reverse_rc', short: 'R', type: 'string', mandatory: false, default: nil, allowed: nil, disallowed: nil}
44 casts << {long: 'template_file', short: 't', type: 'file!', mandatory: false, default: nil, allowed: nil, disallowed: nil}
45 casts << {long: 'mismatches', short: 'm', type: 'uint', mandatory: false, default: 2, allowed: nil, disallowed: nil}
46 casts << {long: 'insertions', short: 'i', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: nil}
47 casts << {long: 'deletions', short: 'd', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: nil}
49 options = Biopieces.options_parse(ARGV, casts)
52 raise "both --beg and --end must be speficied" unless options[:end]
55 raise "--beg (#{options[:beg]}) must be less than --end (#{options[:end]})" if options[:beg] > options[:end]
56 elsif options[:forward] or options[:forward_rc]
57 raise "both --forward and --reverse or --reverse_rc must be specified" unless options[:reverse] or options[:reverse_rc]
59 if options[:forward_rc]
60 options[:forward] = Seq.new(seq: options[:forward_rc], type: :dna).reverse.complement.seq
63 if options[:reverse_rc]
64 options[:reverse] = Seq.new(seq: options[:reverse_rc], type: :dna).reverse.complement.seq
67 raise "either --beg/--end or --forward/--reverse|--reverse_rc must be specified"
70 if options[:template_file]
71 template = Fasta.open(options[:template_file]).get_entry
79 unless template.seq[i].match indels
93 unless template.seq[i].match indels
107 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
108 input.each_record do |record|
110 entry = Seq.new(seq: record[:SEQ])
113 compact = template ? template : Seq.new(seq: entry.seq.dup)
114 compact.seq.delete! "-.~"
116 fmatch = compact.patmatch(options[:forward],
117 max_mismatches: options[:mismatches],
118 max_insertions: options[:insertions],
119 max_deletions: options[:deletions])
121 raise "forward primer: #{options[:forward]} not found" if fmatch.nil?
123 rmatch = compact.patmatch(options[:reverse],
124 max_mismatches: options[:mismatches],
125 max_insertions: options[:insertions],
126 max_deletions: options[:deletions])
128 raise "reverse primer: #{options[:reverse]} not found" if rmatch.nil?
131 mend = rmatch.pos + rmatch.length - 1
136 unless entry.seq[i].match indels
150 unless entry.seq[i].match indels
163 record[:SEQ] = entry[options[:beg] .. options[:end]].seq
164 record[:SEQ_LEN] = record[:SEQ].length
172 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<