From 1aaa3e635bdbfed13485bb1e87b0c6e9b137bb87 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 16 Sep 2010 10:09:30 +0000 Subject: [PATCH] added pcr_seq Biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1092 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/pcr_seq | 201 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100755 bp_bin/pcr_seq diff --git a/bp_bin/pcr_seq b/bp_bin/pcr_seq new file mode 100755 index 0000000..ff930cc --- /dev/null +++ b/bp_bin/pcr_seq @@ -0,0 +1,201 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Runs virtual PCR on sequences in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'biopieces' +require 'fasta' +require 'seq' +require 'pp' + +class Pcr + def initialize(tmpdir, infile, options) + @infile = infile + + pattern = Pattern.new(options[:forward], options[:reverse], options[:max_dist]) + @pat_files = pattern.save(tmpdir) + end + + # Run scan_for_matches using different pattern files on the subject + # FASTA file and return a list of the result files. + def run + outfiles = [] + + @pat_files.each do |pat_file| + outfile = pat_file.sub("pat", "fna") + + command = "scan_for_matches" + command << " -c" + command << " #{pat_file}" + command << " < #{@infile}" + command << " > #{outfile}" + + system(command) + raise "Command failed: #{command}" unless $?.success? + + outfiles << outfile + end + + outfiles + end +end + +class Pattern + attr_accessor :forward, :reverse + + def initialize(forward, reverse, max_dist) + @forward = forward + @reverse = reverse + @max_dist = max_dist + end + + # For each primer pair we need to check 4 possible + # combinations that can give rise to PCR products: + # - forward and reverse (the intended product) + # - forward and revcomp forward + # - revcomp reverse and reverse + # - revcomp reverse and revcomp forward + # Thus we create 4 pattern files and return + # the file names. + def save(tmpdir) + forward = @forward + reverse = @reverse + revcomp_forward = revcomp(forward) + revcomp_reverse = revcomp(reverse) + + files = [] + + file = File.join(tmpdir, "forward_reverse.pat") + self.forward = forward + self.reverse = reverse + save_pattern(file) + files << file + + file = File.join(tmpdir, "forward_forward.pat") + self.forward = forward + self.reverse = revcomp_forward + save_pattern(file) + files << file + + file = File.join(tmpdir, "reverse_reverse.pat") + self.forward = revcomp_reverse + self.reverse = reverse + save_pattern(file) + files << file + + file = File.join(tmpdir, "reverse_forward.pat") + self.forward = revcomp_reverse + self.reverse = revcomp_forward + save_pattern(file) + files << file + + files + end + + private + + # Method to output a pattern. + def to_s + "#{@forward} 1 ... #{@max_dist} #{@reverse}" + end + + # Save a pattern to file + def save_pattern(file) + File.open(file, mode="w") do |ios| + ios.puts self + end + end + + # Split a primer pattern in the form of ATCG[3,2,1] into + # sequence and match descriptor, reverse complement the + # primer and append the match descriptor: CGAT[3,2,1]. + def revcomp(pattern) + if pattern.match(/^(\w+)(\[.+\])?/) + primer = $1 + descriptor = $2 + else + raise "Failed splitting pattern: #{pattern}" + end + + seq = Seq.new + seq.seq = primer + seq.type = 'dna' + seq.revcomp + + descriptor ? seq.seq + descriptor : seq.seq + end +end + +casts = [] +casts << {:long=>'forward', :short=>'f', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'reverse', :short=>'r', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'max_dist', :short=>'m', :type=>'uint', :mandatory=>true, :default=>5000, :allowed=>nil, :disallowed=>"0"} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) +tmpdir = bp.mktmpdir +infile = File.join(tmpdir, "in.fna") + +Fasta.open(infile, mode="w") do |ios| + bp.each_record do |record| + bp.puts record + ios.puts record + end +end + +outfiles = Pcr.new(tmpdir, infile, options).run + +outfiles.each do |outfile| + Fasta.open(outfile, mode="r") do |ios| + ios.each do |entry| + record = entry.to_bp + record[:REC_TYPE] = "PCR" + record[:STRAND] = "+" + record[:TYPE] = File.basename(outfile).sub(".fna", "").upcase + record[:SEQ_NAME].match(/(.+):\[(\d+),(\d+)\]$/) + record[:SEQ_NAME] = $1 + record[:PCR_BEG] = $2 + record[:PCR_END] = $3 + + if record[:PCR_BEG] > record[:PCR_END] + record[:PCR_BEG], record[:PCR_END] = record[:PCR_END], record[:PCR_BEG] + record[:STRAND] = "-" + end + + bp.puts record + end + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ -- 2.39.2