From: martinahansen Date: Mon, 16 Nov 2009 10:47:11 +0000 (+0000) Subject: hacked read_soft to deal with soft_files wo platform table X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=85caf0ad7444710ef3f8c624b17bc2bbb2e377b2;p=biopieces.git hacked read_soft to deal with soft_files wo platform table git-svn-id: http://biopieces.googlecode.com/svn/trunk@762 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/read_soft b/bp_bin/read_soft index 78adc84..10ae8c1 100755 --- a/bp_bin/read_soft +++ b/bp_bin/read_soft @@ -29,6 +29,7 @@ use warnings; use strict; +use Data::Dumper; use Maasha::Biopieces; use Maasha::Filesys; use Maasha::NCBI; @@ -63,6 +64,8 @@ foreach $file ( @{ $options->{ "data_in" } } ) $soft_index = Maasha::NCBI::soft_index_file( $file ); + # print STDERR Dumper( $soft_index ) if $options->{ "verbose" }; + $fh = Maasha::Filesys::file_read_open( $file ); @platforms = grep { $_->{ "SECTION" } =~ /PLATFORM/ } @{ $soft_index }; diff --git a/code_perl/Maasha/NCBI.pm b/code_perl/Maasha/NCBI.pm index baafb69..331dea3 100644 --- a/code_perl/Maasha/NCBI.pm +++ b/code_perl/Maasha/NCBI.pm @@ -109,134 +109,26 @@ sub get_seq } -sub soft_parse -{ - # Martin A. Hansen, February 2008. - - # !!! NOT USED ANYMORE !!! # - - # Reads in and parses a file in SOFT format. - - my ( $path, # full path to SOFT file - ) = @_; - - # Returns a list. - - my ( $fh, @lines, $i, $c, $num, %key_hash, @fields, %id_hash, $id, $seq, $count, $record, @records, $platform_id, $sample_id, $sample_title ); - - $fh = Maasha::Common::read_open( $path ); - - @lines = <$fh>; - - close $fh; - - chomp @lines; - - $i = 0; - - $num = 1; - - while ( $i < @lines ) - { - if ( $lines[ $i ] =~ /^\^PLATFORM = (.+)/ ) - { - $platform_id = $1; - } - elsif ( $lines[ $i ] =~ /^!platform_table_begin$/ ) - { - @fields = split "\t", $lines[ $i + 1 ]; - - for ( $c = 0; $c < @fields; $c++ ) { - $key_hash{ $fields[ $c ] } = $c; - } - - $c = $i + 2; - - while ( $lines[ $c ] !~ /^!platform_table_end$/ ) - { - @fields = split "\t", $lines[ $c ]; - - $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ]; - - $c++; - } - - $i = $c; - } - elsif ( $lines[ $i ] =~ /^\^SAMPLE = (.+)/ ) - { - $sample_id = $1; - } - elsif ( $lines[ $i ] =~ /^!Sample_title = (.+)/ ) - { - $sample_title = $1; - } - elsif ( $lines[ $i ] =~ /^!sample_table_begin/ ) - { - undef %key_hash; - - @fields = split "\t", $lines[ $i + 1 ]; - - for ( $c = 0; $c < @fields; $c++ ) { - $key_hash{ $fields[ $c ] } = $c; - } - - $c = $i + 2; - - while ( $lines[ $c ] !~ /^!sample_table_end$/ ) - { - undef $record; - - @fields = split "\t", $lines[ $c ]; - - $id = $fields[ $key_hash{ "ID_REF" } ]; - $seq = $id_hash{ $id }; - $count = $fields[ $key_hash{ "VALUE" } ]; - - $seq =~ tr/./N/; - - $record->{ "SAMPLE_TITLE" } = $sample_title; - $record->{ "SEQ" } = $seq; - $record->{ "SEQ_NAME" } = join( "_", $platform_id, $sample_id, $num, $count ); - - push @records, $record; - - $c++; - $num++; - } - - $i = $c; - - $num = 1; - } - - $i++; - } - - return wantarray ? @records : \@records; -} - - sub soft_index_file { # Martin A. Hansen, June 2008. - # Create a index with linenumbers of the different tables + # Create a index with line numbers of the different tables # in a soft file. The index is returned. my ( $file, # file to index ) = @_; - # Returns + # Returns a list my ( $fh, $line, $i, $c, @index, $first ); - $fh = Maasha::Common::read_open( $file ); + $fh = Maasha::Filesys::file_read_open( $file ); $first = 1; - $i = 0; - $c = 0; + $i = 0; + $c = 0; while ( $line = <$fh> ) { @@ -400,7 +292,11 @@ sub soft_get_sample @fields = split "\t", $lines[ $i + 1 ]; - for ( $c = 0; $c < @fields; $c++ ) { + for ( $c = 0; $c < @fields; $c++ ) + { + $fields[ $c ] = "ID_REF" if $fields[ $c ] eq "SEQUENCE"; + $fields[ $c ] = "VALUE" if $fields[ $c ] eq "COUNT"; + $key_hash{ $fields[ $c ] } = $c; } @@ -411,9 +307,8 @@ sub soft_get_sample undef $record; @fields = split "\t", $lines[ $c ]; - $id = $fields[ $key_hash{ "ID_REF" } ]; - $seq = $plat_table->{ $id }; + $seq = $plat_table->{ $id } || $id; $count = $fields[ $key_hash{ "VALUE" } ]; $seq =~ tr/./N/;