+++ /dev/null
-package Maasha::BioRun;
-
-
-# Copyright (C) 2007-2009 Martin A. Hansen.
-
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# http://www.gnu.org/copyleft/gpl.html
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-# Routines that contains Biopieces which are run.
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-use strict;
-use Data::Dumper;
-use Getopt::Long qw( :config bundling );
-use Time::HiRes qw( gettimeofday );
-use Storable qw( dclone );
-use Maasha::Biopieces;
-use Maasha::Config;
-use Maasha::Common;
-use Maasha::Filesys;
-use Maasha::Fasta;
-use Maasha::EMBL;
-use Maasha::Seq;
-use Maasha::Calc;
-use Maasha::UCSC;
-use Maasha::UCSC::BED;
-use Maasha::UCSC::Wiggle;
-use Maasha::NCBI;
-use Maasha::GFF;
-use Maasha::TwoBit;
-use Maasha::Solid;
-use Maasha::Solexa;
-use Maasha::SQL;
-use Maasha::Gwiki;
-
-use vars qw( @ISA @EXPORT_OK );
-
-require Exporter;
-
-@ISA = qw( Exporter );
-
-use constant {
- SEQ_NAME => 0,
- SEQ => 1,
-};
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> GLOBALS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-my ( $script, $BP_TMP );
-
-$script = Maasha::Common::get_scriptname();
-$BP_TMP = Maasha::Common::get_tmpdir();
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RUN SCRIPT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-run_script( $script );
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub run_script
-{
- # Martin A. Hansen, August 2007.
-
- # Run a specific script.
-
- my ( $script, # script name
- ) = @_;
-
- # Returns nothing.
-
- my ( $t0, $t1, $options, $in, $out );
-
- Maasha::Biopieces::log_biopiece();
-
- $t0 = gettimeofday();
-
- $options = get_options( $script );
-
- $options->{ "SCRIPT" } = $script;
-
- $script = "print_usage" if ( -t STDIN and keys %{ $options } <= 1 or $options->{ 'help' } );
-
- $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
- $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
-
- if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) }
- elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) }
-
- close $in if defined $in;
- close $out;
-
- $t1 = gettimeofday();
-
- print STDERR "Program: $script" . ( " " x ( 25 - length( $script ) ) ) . sprintf( "Run time: %.4f\n", ( $t1 - $t0 ) ) if $options->{ 'verbose' };
-}
-
-
-sub get_options
-{
- # Martin A. Hansen, February 2008.
-
- # Gets options from commandline and checks these vigerously.
-
- my ( $script, # name of script
- ) = @_;
-
- # Returns hash
-
- my ( %options, @options, $opt, @genomes, $real );
-
- if ( $script eq "print_usage" )
- {
- @options = qw(
- data_in|i=s
- );
- }
-
- push @options, qw(
- stream_in|I=s
- stream_out|O=s
- verbose|v
- help|?
- );
-
-# print STDERR Dumper( \@options );
-
- GetOptions(
- \%options,
- @options,
- );
-
-# print STDERR Dumper( \%options );
-
- if ( -t STDIN && scalar( keys %options ) == 0 or $options{ "help" } ) {
- return wantarray ? %options : \%options;
- }
-
- $options{ "cols" } = [ split ",", $options{ "cols" } ] if defined $options{ "cols" };
- $options{ "keys" } = [ split ",", $options{ "keys" } ] if defined $options{ "keys" };
- $options{ "no_keys" } = [ split ",", $options{ "no_keys" } ] if defined $options{ "no_keys" };
- $options{ "save_keys" } = [ split ",", $options{ "save_keys" } ] if defined $options{ "save_keys" };
- $options{ "quals" } = [ split ",", $options{ "quals" } ] if defined $options{ "quals" };
- $options{ "feats" } = [ split ",", $options{ "feats" } ] if defined $options{ "feats" };
- $options{ "frames" } = [ split ",", $options{ "frames" } ] if defined $options{ "frames" };
- $options{ "samples" } = [ split ",", $options{ "samples" } ] if defined $options{ "samples" };
- $options{ "tables" } = [ split ",", $options{ "tables" } ] if defined $options{ "tables" };
- $options{ "tracks" } = [ split ",", $options{ "tracks" } ] if defined $options{ "tracks" };
-
- # ---- check arguments ----
-
- if ( $options{ 'data_in' } )
- {
- $options{ "files" } = Maasha::Biopieces::getopt_files( $options{ 'data_in' } );
-
- Maasha::Common::error( qq(Argument to --data_in must be a valid file or fileglob expression) ) if scalar @{ $options{ "files" } } == 0;
- }
-
- map { Maasha::Common::error( qq(Argument to --cols must be a whole numbers - not "$_") ) if $_ !~ /^\d+$/ } @{ $options{ "cols" } } if $options{ "cols" };
-
- # print STDERR Dumper( \%options );
-
- $real = "beg|end|word_size|wrap|len|prefix_length|mismatches|offset|num|skip|cpus|window_size|step_size";
-
- foreach $opt ( keys %options )
- {
- if ( $opt =~ /stream_in|pattern_in|exact_in/ and not -f $options{ $opt } )
- {
- Maasha::Common::error( qq(Argument to --$opt must be a valid file or fileglob expression - not "$options{ $opt }") );
- }
- elsif ( $opt =~ /$real/ and $options{ $opt } !~ /^\d+$/ )
- {
- Maasha::Common::error( qq(Argument to --$opt must be a whole number - not "$options{ $opt }") );
- }
- elsif ( $opt =~ /max_hits|max_hits|max_misses|dist|edit_dist|flank|gap|hamming_dist|priority/ and $options{ $opt } !~ /^-?\d+$/ )
- {
- Maasha::Common::error( qq(Argument to --$opt must be an integer - not "$options{ $opt }") );
- }
- elsif ( $opt =~ /identity|threshold/ and $options{ $opt } !~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/ )
- {
- Maasha::Common::error( qq(Argument to --$opt must be a decimal number - not "$options{ $opt }") );
- }
- elsif ( $opt =~ /e_val/ and $options{ $opt } !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/ )
- {
- Maasha::Common::error( qq(Argument to --$opt must be a float - not "$options{ $opt }") );
- }
- elsif ( $opt =~ /strand/ and $options{ $opt } !~ /^(\+|-)$/ )
- {
- Maasha::Common::error( qq(Argument to --$opt must be "+" or "-" - not "$options{ $opt }") );
- }
- elsif ( $opt eq "genome" )
- {
- @genomes = Maasha::Common::ls_dirs( "$ENV{ 'BP_DATA' }/genomes" );
- map { $_ =~ s/.*\/(.+)$/$1/ } @genomes;
-
- if ( not grep { $_ =~ /^$options{ $opt }$/ } @genomes ) {
- Maasha::Common::error( qq(Genome $options{ $opt } not found in "$ENV{ 'BP_DATA' }/genomes/") );
- }
- }
- elsif ( $opt eq "terminal" and not $options{ $opt } =~ /^(svg|post|dumb|x11)/ )
- {
- Maasha::Common::error( qq(Bad --$opt argument "$options{ $opt }") );
- }
- elsif ( $opt eq "table" and $options{ $opt } =~ /(-|\.)/ )
- {
- Maasha::Common::error( qq(Character '$1' is not allowed in table name: $options{ $opt }) );
- }
- elsif ( $opt eq "merge" and $options{ $opt } !~ /^(AandB|AorB|BorA|AnotB|BnotA)$/ )
- {
- Maasha::Common::error( qq(Argument to --$opt must be AandB, AorB, BorA, AnotB, or BnotA - not "$options{ $opt }") );
- }
- }
-
-
- return wantarray ? %options : \%options;
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SCRIPTS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub script_print_usage
-{
- # Martin A. Hansen, January 2008.
-
- # Retrieves usage information from file and
- # prints this nicely formatted.
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- $options, # options hash
- ) = @_;
-
- # Returns nothing.
-
- my ( $file, $wiki, $lines );
-
- if ( $options->{ 'data_in' } ) {
- $file = $options->{ 'data_in' };
- } else {
- $file = join "", $ENV{ 'BP_DIR' }, "/bp_usage/", $options->{ 'SCRIPT' }, ".wiki";
- }
-
- $wiki = Maasha::Gwiki::gwiki_read( $file );
-
- ( $wiki->[ 2 ], $wiki->[ 3 ], $wiki->[ 0 ], $wiki->[ 1 ] ) = ( $wiki->[ 0 ], $wiki->[ 1 ], $wiki->[ 2 ], $wiki->[ 3 ] );
-
- if ( not $options->{ "help" } ) {
- @{ $wiki } = grep { $_->[ 0 ]->{ 'SECTION' } =~ /Biopiece|summary|Usage|Options|Help/ } @{ $wiki };
- }
-
- $lines = Maasha::Gwiki::gwiki2ascii( $wiki );
-
- print STDERR "$_\n" foreach @{ $lines };
-
- exit;
-}
-
-
-sub script_complexity_seq
-{
- # Martin A. Hansen, May 2008.
-
- # Generates an index calculated as the most common di-residue over
- # the sequence length for all sequences in stream.
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- ) = @_;
-
- # Returns nothing.
-
- my ( $record, $index );
-
- while ( $record = Maasha::Biopieces::get_record( $in ) )
- {
- $record->{ "SEQ_COMPLEXITY" } = sprintf( "%.2f", Maasha::Seq::seq_complexity( $record->{ "SEQ" } ) ) if $record->{ "SEQ" };
-
- Maasha::Biopieces::put_record( $record, $out );
- }
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-1;
-
-__END__
-