X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Ffind_genes;h=88dacf2ddee0c7ec899a13cf6f008a7b6380e61b;hb=04c4ab8721f07155ac6b02d619c9623f82948ec6;hp=4ee726e974b0c994285a6570e975a20120d7330f;hpb=6313779656850c7e75095e5a800b7ff61096f7c4;p=biopieces.git diff --git a/bp_bin/find_genes b/bp_bin/find_genes index 4ee726e..88dacf2 100755 --- a/bp_bin/find_genes +++ b/bp_bin/find_genes @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,153 +18,50 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Find genes in sequences in stream. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Data::Dumper; -use Maasha::Common; -use Maasha::Biopieces; -use Maasha::Filesys; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Find genes in prokaryotic genomic sequences in the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $record, $tmp_dir, $tmp_file, $fh_out, $fh_in, $entry, $chunk, @lines, $line, $s_id, $type, $s_beg, $s_end, $strand, @fields, $def, - @commands, $command ); - -$options = Maasha::Biopieces::parse_options( - [ - { long => 'full', short => 'f', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); - -$tmp_dir = Maasha::Biopieces::get_tmpdir(); -$tmp_file = "$tmp_dir/tmp.seq"; -$fh_out = Maasha::Filesys::file_write_open( $tmp_file ); - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) { - Maasha::Fasta::put_entry( $entry, $fh_out ); - } - - Maasha::Biopieces::put_record( $record, $out ); -} - -close $fh_out; - -push @commands, "-c" if $options->{ 'full' }; -push @commands, "< $tmp_file > $tmp_file.out"; -push @commands, "2> /dev/null" unless $options->{ 'verbose' }; - -$command = join " ", @commands; - -Maasha::Common::run( "prodigal", $command ); - -$fh_in = Maasha::Filesys::file_read_open( "$tmp_file.out" ); - -$/ = "//\n"; +require 'maasha/biopieces' +require 'maasha/fasta' +require 'maasha/prodigal' -while ( $chunk = <$fh_in> ) -{ - chomp $chunk; +casts = [] +casts << {:long=>'full', :short=>'f', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'procedure', :short=>'p', :type=>'string', :mandatory=>true, :default=>'single', :allowed=>'single,meta', :disallowed=>nil} - @lines = split /\n/, $chunk; +options = Biopieces.options_parse(ARGV, casts) - $line = shift @lines; - - if ( $line =~ /^DEFINITION\s+(.+)/ ) - { - $def = $1; - - if ( $def =~ /seqhdr="([^"]+)"/ ) { - $s_id = $1; - } else { - Maasha::Common::error( qq(BAD sequence header: $def) ); - } - - $line = shift @lines; - - if ( $line =~ /^FEATURES/ ) - { - foreach $line ( @lines ) - { - next if $line =~ /.+\//; - - @fields = split " ", $line; - - $type = $fields[ 0 ]; - - if ( $fields[ 1 ] =~ /complement/ ) { - $strand = "-"; - } else { - $strand = "+"; - } - - if ( $fields[ 1 ] =~ /(\d+)\.\.>?(\d+)/ ) - { - $s_beg = $1; - $s_end = $2; - } - else - { - Maasha::Common::error( qq(BAD locator: $line) ); - } - - $record = { - S_ID => $s_id, - S_BEG => $s_beg - 1, - S_END => $s_end - 1, - Q_ID => $type, - STRAND => $strand, - }; - - Maasha::Biopieces::put_record( $record, $out ); - } - } - else - { - Maasha::Common::error( qq(BAD feature: $line) ); - } - } - else - { - Maasha::Common::error( qq(BAD definition: $line) ); - } -} - -close $fh_in; - -$/ = "\n"; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +tmpdir = Biopieces.mktmpdir +infile = File.join(tmpdir, "in.fna") +outfile = File.join(tmpdir, "out.prodigal") +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + Fasta.open(infile, "w") do |fasta_io| + input.each_record do |record| + output.puts record -BEGIN -{ - Maasha::Biopieces::status_set(); -} + if record[:SEQ_NAME] and record[:SEQ] + seq = Seq.new_bp(record) + fasta_io.puts seq.to_fasta + end + end + end + prodigal = Prodigal.new(infile, outfile, options) + prodigal.run -END -{ - Maasha::Biopieces::status_log(); -} + prodigal.each do |record| + output.puts record + end +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<