From da18acf38dd939cdec558c919e53dd966c83ce1f Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 13 Jul 2012 13:52:36 +0000 Subject: [PATCH] added find_orfs git-svn-id: http://biopieces.googlecode.com/svn/trunk@1867 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/find_orfs | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100755 bp_bin/find_orfs diff --git a/bp_bin/find_orfs b/bp_bin/find_orfs new file mode 100755 index 0000000..868665d --- /dev/null +++ b/bp_bin/find_orfs @@ -0,0 +1,70 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Determine the frequencies for k-mers in sequences in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'maasha/biopieces' +require 'maasha/seq' + +casts = [] +casts << {:long=>'start_codons', :short=>'s', :type=>'list', :mandatory=>true, :default=>"AUG,GUG", :allowed=>nil, :disallowed=>nil} +casts << {:long=>'stop_codons', :short=>'S', :type=>'list', :mandatory=>true, :default=>"UAA,UGA,UAG", :allowed=>nil, :disallowed=>nil} +casts << {:long=>'min_size', :short=>'m', :type=>'uint', :mandatory=>true, :default=>50, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'max_size', :short=>'M', :type=>'uint', :mandatory=>true, :default=>10_000, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'non_redundant', :short=>'r', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} + +options = Biopieces.options_parse(ARGV, casts) + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] + entry = Seq.new_bp(record) + + entry.each_orf(options[:min_size], options[:max_size], options[:start_codons], options[:stop_codons], options[:non_redundant]) do |orf, orf_beg, orf_end| + new_record = {} + new_record[:REC_TYPE] = "ORF" + new_record.merge!(orf.to_bp) + new_record[:S_BEG] = orf_beg + new_record[:S_END] = orf_end + + output.puts new_record + end + + exit + end + + output.puts record + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ -- 2.39.5