#!/usr/bin/env ruby
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
require 'pp'
require 'maasha/biopieces'
-require 'maasha/fasta'
-
-# Error class for PatScan errors.
-class PatScanError < StandardError; end;
-
-class PatScan
- def initialize(options, file_fasta, file_pattern, file_patscan)
- @options = options
- @file_fasta = file_fasta
- @file_pattern = file_pattern
- @file_patscan = file_patscan
-
- pat = Pattern.new(@options)
- pat.write(@file_pattern)
- end
-
- def run
- commands = []
- commands << "nice -n 19"
- commands << "scan_for_matches"
- commands << @file_pattern
- commands << "< #{@file_fasta}"
- commands << "> #{@file_patscan}"
- command = commands.join(" ")
- system(command)
- raise PatScanError, "Command failed: #{command}" unless $?.success?
- end
-
- def parse_results
- matches = {}
-
- Fasta.open(@file_patscan, mode='r') do |ios|
- ios.each do |entry|
- if entry.seq_name =~ /^(\d+):\[(\d+),(\d+)\]$/
- name = $1.to_i
- start = $2.to_i - 1
- stop = $3.to_i - 1
- matches[name] = [start, stop - start + 1] unless matches.has_key? name
- else
- raise "Failed to parse sequence name: #{entry.seq_name}"
- end
- end
- end
+require 'maasha/seq'
+require 'maasha/seq/backtrack'
- matches
- end
+def percent2real(length, percent)
+ (length * percent * 0.01).round
end
-# Error class for Pattern errors.
-class PatternError < StandardError; end;
+include BackTrack
-class Pattern
- def initialize(options)
- @options = options
- @patterns = []
- @patterns << pattern_internal
- @patterns += patterns_end if @options[:partial]
- end
-
- def to_i
- new_patterns = []
-
- while @patterns.size > 1
- new_patterns = @patterns[0 ... -2]
- new_patterns << "( #{@patterns[-2 .. -1].join(' | ')} )"
+casts = []
+casts << {:long=>'forward', :short=>'f', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'forward_rc', :short=>'F', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'reverse', :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'reverse_rc', :short=>'R', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'len_forward', :short=>'l', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'len_reverse', :short=>'L', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'mismatches', :short=>'m', :type=>'uint', :mandatory=>false, :default=>10, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'insertions', :short=>'i', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'deletions', :short=>'d', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil}
- @patterns = new_patterns
- end
+options = Biopieces.options_parse(ARGV, casts)
- @patterns.first
- end
+if options[:forward_rc]
+ options[:forward] = Seq.new("test", options[:forward_rc], 'dna').revcomp.seq
+end
- def write(file)
- File.open(file, mode='w') do |ios|
- ios.puts self.to_i
- end
- end
+if options[:reverse_rc]
+ options[:reverse] = Seq.new("test", options[:reverse_rc], 'dna').revcomp.seq
+end
- private
+raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse]
- def pattern_internal
- pattern = @options[:adaptor]
- mis = mis_count(pattern)
- ins = ins_count(pattern)
- del = del_count(pattern)
+if options[:forward]
+ options[:len_forward] = options[:forward].length unless options[:len_forward]
- "#{pattern}[#{mis},#{ins},#{del}]"
+ if options[:len_forward] > options[:forward].length
+ raise ArgumentError, "len_forward > forward adaptor (#{options[:len_forward]} > #{options[:forward].length})"
end
- def patterns_end
- patterns = []
- adaptor = @options[:adaptor]
-
- raise PatternError, "len > adaptor length: #{@options[:len]} > #{adaptor.length - 1}" if @options[:len] > adaptor.length - 1
-
- (adaptor.length - 1).downto(@options[:len]) do |i|
- pattern = adaptor[0 ... i]
- mis = mis_count(pattern)
- ins = ins_count(pattern)
- del = del_count(pattern)
- patterns << "#{pattern}[#{mis},#{ins},#{del}] $"
- end
+ fmis = percent2real(options[:forward].length, options[:mismatches])
+ fins = percent2real(options[:forward].length, options[:insertions])
+ fdel = percent2real(options[:forward].length, options[:deletions])
+end
- patterns
- end
+if options[:reverse]
+ options[:len_reverse] = options[:reverse].length unless options[:len_reverse]
- def mis_count(pattern)
- (pattern.length * @options[:mismatches] * 0.01).round
+ if options[:len_reverse] > options[:reverse].length
+ raise ArgumentError, "len_reverse > reverse adaptor (#{options[:len_reverse]} > #{options[:reverse].length})"
end
- def ins_count(pattern)
- (pattern.length * @options[:insertions] * 0.01).round
- end
-
- def del_count(pattern)
- (pattern.length * @options[:deletions] * 0.01).round
- end
+ rmis = percent2real(options[:reverse].length, options[:mismatches])
+ rins = percent2real(options[:reverse].length, options[:insertions])
+ rdel = percent2real(options[:reverse].length, options[:deletions])
end
-casts = []
-casts << {:long=>'adaptor', :short=>'a', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'partial', :short=>'p', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'len', :short=>'l', :type=>'uint', :mandatory=>false, :default=>10, :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'mismatches', :short=>'m', :type=>'uint', :mandatory=>false, :default=>10, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'insertions', :short=>'i', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'deletions', :short=>'d', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil}
-
-options = Biopieces.options_parse(ARGV, casts)
-
-tmpdir = Biopieces.mktmpdir
-file_fasta = File.join(tmpdir, "data.fna")
-file_records = File.join(tmpdir, "data.stream")
-file_pattern = File.join(tmpdir, "pattern.txt")
-file_patscan = File.join(tmpdir, "patscan.fna")
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each do |record|
+ if record[:SEQ] and record[:SEQ].length > 0
+ entry = Seq.new_bp(record)
+
+ if options[:forward]
+ if m = entry.patmatch(options[:forward], 0, fmis, fins, fdel)
+ record[:ADAPTOR_POS_LEFT] = m.pos
+ record[:ADAPTOR_LEN_LEFT] = m.length
+ record[:ADAPTOR_PAT_LEFT] = m.match
+ elsif options[:len_forward] < options[:forward].length
+ len = options[:forward].length - 1
+ pat = options[:forward]
+
+ while len >= options[:len_forward]
+ fmis = percent2real(len, options[:mismatches])
+ fins = percent2real(len, options[:insertions])
+ fdel = percent2real(len, options[:deletions])
+
+ pat = pat[1 ... pat.length]
+
+ if m = entry.patmatch(pat, [0, len], fmis, fins, fdel)
+ record[:ADAPTOR_POS_LEFT] = m.pos
+ record[:ADAPTOR_LEN_LEFT] = m.length
+ record[:ADAPTOR_PAT_LEFT] = m.match
+
+ break
+ end
+
+ len -= 1
+ end
+ end
+ end
-count = 0
+ if options[:reverse]
+ if m = entry.patmatch(options[:reverse], 0, rmis, rins, rdel)
+ record[:ADAPTOR_POS_RIGHT] = m.pos
+ record[:ADAPTOR_LEN_RIGHT] = m.length
+ record[:ADAPTOR_PAT_RIGHT] = m.match
+ elsif options[:len_reverse] < options[:reverse].length
+ len = options[:reverse].length - 1
+ pat = options[:reverse]
-Biopieces.open(options[:stream_in], file_records) do |input, output|
- Fasta.open(file_fasta, mode='w') do |out_fa|
- input.each do |record|
- output.puts record
+ while len >= options[:len_reverse]
+ rmis = percent2real(len, options[:mismatches])
+ rins = percent2real(len, options[:insertions])
+ rdel = percent2real(len, options[:deletions])
- if record.has_key? :SEQ
- record[:SEQ_NAME] = count
- out_fa.puts record
+ pat = pat[0 ... pat.length - 1]
- count += 1;
- end
- end
- end
-end
-
-patscan = PatScan.new(options, file_fasta, file_pattern, file_patscan)
-patscan.run
-matches = patscan.parse_results
+ if m = entry.patmatch(pat, entry.length - len, rmis, rins, rdel)
+ record[:ADAPTOR_POS_RIGHT] = m.pos
+ record[:ADAPTOR_LEN_RIGHT] = m.length
+ record[:ADAPTOR_PAT_RIGHT] = m.match
-count = 0
+ break
+ end
-Biopieces.open(file_records, options[:stream_out]) do |input, output|
- input.each_record do |record|
- if record.has_key? :SEQ
- if matches.has_key? count
- record[:ADAPTOR_POS] = matches[count].first
- record[:ADAPTOR_LEN] = matches[count].last
+ len -= 1
+ end
+ end
end
-
- count += 1;
end
output.puts record
end
end
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<