# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use Data::Dumper;
use LWP::Simple;
use Maasha::Common;
+use Maasha::Filesys;
+use Maasha::Seq;
use vars qw( @ISA @EXPORT );
}
-sub soft_parse
-{
- # Martin A. Hansen, February 2008.
-
- # !!! NOT USED ANYMORE !!! #
-
- # Reads in and parses a file in SOFT format.
-
- my ( $path, # full path to SOFT file
- ) = @_;
-
- # Returns a list.
-
- my ( $fh, @lines, $i, $c, $num, %key_hash, @fields, %id_hash, $id, $seq, $count, $record, @records, $platform_id, $sample_id, $sample_title );
-
- $fh = &Maasha::Common::read_open( $path );
-
- @lines = <$fh>;
-
- close $fh;
-
- chomp @lines;
-
- $i = 0;
-
- $num = 1;
-
- while ( $i < @lines )
- {
- if ( $lines[ $i ] =~ /^\^PLATFORM = (.+)/ )
- {
- $platform_id = $1;
- }
- elsif ( $lines[ $i ] =~ /^!platform_table_begin$/ )
- {
- @fields = split "\t", $lines[ $i + 1 ];
-
- for ( $c = 0; $c < @fields; $c++ ) {
- $key_hash{ $fields[ $c ] } = $c;
- }
-
- $c = $i + 2;
-
- while ( $lines[ $c ] !~ /^!platform_table_end$/ )
- {
- @fields = split "\t", $lines[ $c ];
-
- $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
-
- $c++;
- }
-
- $i = $c;
- }
- elsif ( $lines[ $i ] =~ /^\^SAMPLE = (.+)/ )
- {
- $sample_id = $1;
- }
- elsif ( $lines[ $i ] =~ /^!Sample_title = (.+)/ )
- {
- $sample_title = $1;
- }
- elsif ( $lines[ $i ] =~ /^!sample_table_begin/ )
- {
- undef %key_hash;
-
- @fields = split "\t", $lines[ $i + 1 ];
-
- for ( $c = 0; $c < @fields; $c++ ) {
- $key_hash{ $fields[ $c ] } = $c;
- }
-
- $c = $i + 2;
-
- while ( $lines[ $c ] !~ /^!sample_table_end$/ )
- {
- undef $record;
-
- @fields = split "\t", $lines[ $c ];
-
- $id = $fields[ $key_hash{ "ID_REF" } ];
- $seq = $id_hash{ $id };
- $count = $fields[ $key_hash{ "VALUE" } ];
-
- $seq =~ tr/./N/;
-
- $record->{ "SAMPLE_TITLE" } = $sample_title;
- $record->{ "SEQ" } = $seq;
- $record->{ "SEQ_NAME" } = join( "_", $platform_id, $sample_id, $num, $count );
-
- push @records, $record;
-
- $c++;
- $num++;
- }
-
- $i = $c;
-
- $num = 1;
- }
-
- $i++;
- }
-
- return wantarray ? @records : \@records;
-}
-
-
sub soft_index_file
{
# Martin A. Hansen, June 2008.
- # Create a index with linenumbers of the different tables
+ # Create a index with line numbers of the different tables
# in a soft file. The index is returned.
my ( $file, # file to index
) = @_;
- # Returns
+ # Returns a list
my ( $fh, $line, $i, $c, @index, $first );
- $fh = &Maasha::Common::read_open( $file );
+ $fh = Maasha::Filesys::file_read_open( $file );
$first = 1;
- $i = 0;
- $c = 0;
+ $i = 0;
+ $c = 0;
while ( $line = <$fh> )
{
if ( $line =~ /^\^/ )
{
- push @index, [ $line, $i ];
+ push @index, {
+ SECTION => $line,
+ LINE_BEG => $i,
+ };
if ( not $first )
{
- push @{ $index[ $c - 1 ] }, $i - 1;
+ $index[ $c - 1 ]{ "LINE_END" } = $i - 1;
}
else
{
$i++;
}
- push @{ $index[ $c - 1 ] }, $i - 1;
+ $index[ $c - 1 ]{ "LINE_END" } = $i - 1;
close $fh;
{
# Martin A. Hansen, June 2008.
- # Given a filehandle to a SOFT file parses the platform table
+ # Given a filehandle to a SOFT file parses all the platform tables
# which is returned.
my ( $fh, # filehandle
{
@fields = split "\t", $lines[ $c ];
- $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
-
+ if ( defined $key_hash{ "SEQUENCE" } ) {
+ $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
+ } else {
+ $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "ID" } ];
+ }
+
$c++;
}
{
# Martin A. Hansen, June 2008.
- # Given a filehandle to a SOFT file parses the platform table
- # which is returned.
+ # Given a filehandle to a SOFT file and a platform table
+ # parses sample records which are returned.
- my ( $fh, # filehandle
- $plat_table, # hashref with platform tables
- $beg, # line number where sample table begin
- $end, # line number where sample table end
+ my ( $fh, # filehandle
+ $plat_table, # hashref with platform table
+ $beg, # line number where sample table begin
+ $end, # line number where sample table end
+ $skip, # flag indicating that this sample table should not be parsed.
) = @_;
- # Returns hashref
+ # Returns list of hashref
my ( $line, @lines, $i, $c, $platform_id, @fields, %key_hash, $num, $sample_id, $sample_title, $id, $seq, $count, @records, $record );
while ( $line = <$fh> )
{
- chomp $line;
+ if ( not $skip )
+ {
+ chomp $line;
+
+ push @lines, $line if $i >= $beg;
+ }
- push @lines, $line if $i >= $beg;
-
last if $i == $end;
$i++;
}
+ if ( $skip ) {
+ return wantarray ? () : [];
+ }
+
$i = 0;
$num = 1;
@fields = split "\t", $lines[ $i + 1 ];
- for ( $c = 0; $c < @fields; $c++ ) {
+ for ( $c = 0; $c < @fields; $c++ )
+ {
+ $fields[ $c ] = "ID_REF" if $fields[ $c ] eq "SEQUENCE";
+ $fields[ $c ] = "VALUE" if $fields[ $c ] eq "COUNT";
+
$key_hash{ $fields[ $c ] } = $c;
}
undef $record;
@fields = split "\t", $lines[ $c ];
-
$id = $fields[ $key_hash{ "ID_REF" } ];
- $seq = $plat_table->{ $id };
+ $seq = $plat_table->{ $id } || $id;
$count = $fields[ $key_hash{ "VALUE" } ];
$seq =~ tr/./N/;
}
+sub blast_index
+{
+ # Martin A. Hansen, Juli 2008.
+
+ # Create a BLAST index of a given FASTA file
+ # in a specified directory using a given name.
+
+ my ( $file, # path to FASTA file
+ $src_dir, # source directory with FASTA file
+ $dst_dir, # destination directory for BLAST index
+ $index_type, # protein or nucleotide
+ $index_name, # name of index
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $type );
+
+ if ( $index_type =~ /protein/i ) {
+ $type = "T";
+ } else {
+ $type = "F";
+ }
+
+ Maasha::Filesys::dir_create_if_not_exists( $dst_dir );
+ Maasha::Common::run( "formatdb", "-p $type -i $src_dir/$file -t $index_name -l /dev/null" );
+ Maasha::Common::run( "mv", "$src_dir/*hr $dst_dir" );
+ Maasha::Common::run( "mv", "$src_dir/*in $dst_dir" );
+ Maasha::Common::run( "mv", "$src_dir/*sq $dst_dir" );
+}
+
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<