From: martinahansen Date: Mon, 9 May 2011 09:01:48 +0000 (+0000) Subject: ported find_genes to ruby X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=1a1f26d36b33562f9669185d802d8ca2381397f8;p=biopieces.git ported find_genes to ruby git-svn-id: http://biopieces.googlecode.com/svn/trunk@1382 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/find_genes b/bp_bin/find_genes index 4ee726e..f8fc0e8 100755 --- a/bp_bin/find_genes +++ b/bp_bin/find_genes @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,153 +18,45 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Find genes in sequences in stream. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Data::Dumper; -use Maasha::Common; -use Maasha::Biopieces; -use Maasha::Filesys; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Find genes in prokaryotic genomic sequences in the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $record, $tmp_dir, $tmp_file, $fh_out, $fh_in, $entry, $chunk, @lines, $line, $s_id, $type, $s_beg, $s_end, $strand, @fields, $def, - @commands, $command ); - -$options = Maasha::Biopieces::parse_options( - [ - { long => 'full', short => 'f', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); - -$tmp_dir = Maasha::Biopieces::get_tmpdir(); -$tmp_file = "$tmp_dir/tmp.seq"; -$fh_out = Maasha::Filesys::file_write_open( $tmp_file ); - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) { - Maasha::Fasta::put_entry( $entry, $fh_out ); - } - - Maasha::Biopieces::put_record( $record, $out ); -} - -close $fh_out; - -push @commands, "-c" if $options->{ 'full' }; -push @commands, "< $tmp_file > $tmp_file.out"; -push @commands, "2> /dev/null" unless $options->{ 'verbose' }; - -$command = join " ", @commands; - -Maasha::Common::run( "prodigal", $command ); - -$fh_in = Maasha::Filesys::file_read_open( "$tmp_file.out" ); - -$/ = "//\n"; +require 'biopieces' +require 'fasta' +require 'prodigal' -while ( $chunk = <$fh_in> ) -{ - chomp $chunk; +casts = [] +casts << {:long=>'full', :short=>'f', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} - @lines = split /\n/, $chunk; +bp = Biopieces.new - $line = shift @lines; - - if ( $line =~ /^DEFINITION\s+(.+)/ ) - { - $def = $1; - - if ( $def =~ /seqhdr="([^"]+)"/ ) { - $s_id = $1; - } else { - Maasha::Common::error( qq(BAD sequence header: $def) ); - } - - $line = shift @lines; - - if ( $line =~ /^FEATURES/ ) - { - foreach $line ( @lines ) - { - next if $line =~ /.+\//; - - @fields = split " ", $line; - - $type = $fields[ 0 ]; - - if ( $fields[ 1 ] =~ /complement/ ) { - $strand = "-"; - } else { - $strand = "+"; - } - - if ( $fields[ 1 ] =~ /(\d+)\.\.>?(\d+)/ ) - { - $s_beg = $1; - $s_end = $2; - } - else - { - Maasha::Common::error( qq(BAD locator: $line) ); - } - - $record = { - S_ID => $s_id, - S_BEG => $s_beg - 1, - S_END => $s_end - 1, - Q_ID => $type, - STRAND => $strand, - }; - - Maasha::Biopieces::put_record( $record, $out ); - } - } - else - { - Maasha::Common::error( qq(BAD feature: $line) ); - } - } - else - { - Maasha::Common::error( qq(BAD definition: $line) ); - } -} - -close $fh_in; - -$/ = "\n"; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +options = bp.parse(ARGV, casts) +tmpdir = bp.mktmpdir +infile = "#{tmpdir}/in.fna" +outfile = "#{tmpdir}/out.prodigal" -BEGIN -{ - Maasha::Biopieces::status_set(); -} +Fasta.open(infile, mode="w") do |fasta_io| + bp.each_record do |record| + bp.puts record + fasta_io.puts record + end +end +prodigal = Prodigal.new(infile, outfile, options) +prodigal.run -END -{ - Maasha::Biopieces::status_log(); -} +prodigal.each do |record| + bp.puts record +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<