X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fpcr_seq;h=6afbb343f3e497595261f4cd785a558ff0ce97b9;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=df97f20bb7ea3487d6cf15de68d2d2cb404fa31e;hpb=494dc53ebd515b1e3e9b91bbebf43059899ca4ce;p=biopieces.git diff --git a/bp_bin/pcr_seq b/bp_bin/pcr_seq index df97f20..6afbb34 100755 --- a/bp_bin/pcr_seq +++ b/bp_bin/pcr_seq @@ -32,7 +32,6 @@ require 'maasha/biopieces' require 'maasha/fasta' require 'maasha/seq' -require 'pp' class Pcr def initialize(tmpdir, infile, options) @@ -52,6 +51,7 @@ class Pcr command = "scan_for_matches" # command << " -c" + command << " -o 1" command << " #{pat_file}" command << " < #{@infile}" command << " > #{outfile}" @@ -69,6 +69,25 @@ end class Pattern attr_accessor :forward, :reverse + # Split a primer pattern in the form of ATCG[3,2,1] into + # sequence and match descriptor, reverse complement the + # primer and append the match descriptor: CGAT[3,2,1]. + def self.revcomp(pattern) + if pattern.match(/^(\w+)(\[.+\])?/) + primer = $1 + descriptor = $2 + else + raise "Failed splitting pattern: #{pattern}" + end + + seq = Seq.new + seq.seq = primer + seq.type = :dna + seq.reverse!.complement! + + descriptor ? seq.seq + descriptor : seq.seq + end + def initialize(forward, reverse, max_dist) @forward = forward @reverse = reverse @@ -86,8 +105,8 @@ class Pattern def save(tmpdir) forward = @forward reverse = @reverse - revcomp_forward = revcomp(forward) - revcomp_reverse = revcomp(reverse) + revcomp_forward = Pattern.revcomp(forward) + revcomp_reverse = Pattern.revcomp(reverse) files = [] @@ -127,69 +146,65 @@ class Pattern # Save a pattern to file def save_pattern(file) - File.open(file, mode="w") do |ios| + File.open(file, "w") do |ios| ios.puts self end end - - # Split a primer pattern in the form of ATCG[3,2,1] into - # sequence and match descriptor, reverse complement the - # primer and append the match descriptor: CGAT[3,2,1]. - def revcomp(pattern) - if pattern.match(/^(\w+)(\[.+\])?/) - primer = $1 - descriptor = $2 - else - raise "Failed splitting pattern: #{pattern}" - end - - seq = Seq.new - seq.seq = primer - seq.type = 'dna' - seq.revcomp - - descriptor ? seq.seq + descriptor : seq.seq - end end casts = [] -casts << {:long=>'forward', :short=>'f', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'reverse', :short=>'r', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'max_dist', :short=>'m', :type=>'uint', :mandatory=>true, :default=>5000, :allowed=>nil, :disallowed=>"0"} - -bp = Biopieces.new - -options = bp.parse(ARGV, casts) -tmpdir = bp.mktmpdir +casts << {:long=>'forward', :short=>'f', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'forward_rc', :short=>'F', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'reverse', :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'reverse_rc', :short=>'R', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'max_dist', :short=>'m', :type=>'uint', :mandatory=>true, :default=>5000, :allowed=>nil, :disallowed=>"0"} + +options = Biopieces.options_parse(ARGV, casts) +tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") -Fasta.open(infile, mode="w") do |ios| - bp.each_record do |record| - bp.puts record - ios.puts record - end +if options[:forward_rc] + options[:forward] = Pattern.revcomp(options[:forward_rc]) +end + +if options[:reverse_rc] + options[:reverse] = Pattern.revcomp(options[:reverse_rc]) end -outfiles = Pcr.new(tmpdir, infile, options).run - -outfiles.each do |outfile| - Fasta.open(outfile, mode="r") do |ios| - ios.each do |entry| - record = entry.to_bp - record[:REC_TYPE] = "PCR" - record[:STRAND] = "+" - record[:TYPE] = File.basename(outfile).sub(".fna", "").upcase - record[:SEQ_NAME].match(/(.+):\[(\d+),(\d+)\]$/) - record[:SEQ_NAME] = $1 - record[:PCR_BEG] = $2 - record[:PCR_END] = $3 - - if record[:PCR_BEG] > record[:PCR_END] - record[:PCR_BEG], record[:PCR_END] = record[:PCR_END], record[:PCR_BEG] - record[:STRAND] = "-" +raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse] +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + Fasta.open(infile, "w") do |ios| + input.each_record do |record| + output.puts record + + if record[:SEQ] + entry = Seq.new_bp(record) + ios.puts entry.to_fasta end + end + end - bp.puts record + outfiles = Pcr.new(tmpdir, infile, options).run + + outfiles.each do |outfile| + Fasta.open(outfile, "r") do |ios| + ios.each do |entry| + record = entry.to_bp + record[:REC_TYPE] = "PCR" + record[:STRAND] = "+" + record[:TYPE] = File.basename(outfile).sub(".fna", "").upcase + record[:SEQ_NAME].match(/(.+):\[(\d+),(\d+)\]$/) + record[:SEQ_NAME] = $1 + record[:PCR_BEG] = $2.to_i + record[:PCR_END] = $3.to_i + + if record[:PCR_BEG] > record[:PCR_END] + record[:PCR_BEG], record[:PCR_END] = record[:PCR_END], record[:PCR_BEG] + record[:STRAND] = "-" + end + + output.puts record + end end end end