From 87f5a21b2ac32cf899e6a8efcc77ee6907398d37 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 22 Aug 2008 04:47:44 +0000 Subject: [PATCH] fixed read_soft to deal with stupid Soft format feature git-svn-id: http://biopieces.googlecode.com/svn/trunk@219 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/Biopieces.pm | 27 ++++++++++++-------- code_perl/Maasha/NCBI.pm | 47 +++++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index 11c8020..ce308a3 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -1015,7 +1015,7 @@ sub get_options Maasha::Common::error( qq(Genome $options{ $opt } not found in "$ENV{ 'BP_DATA' }/genomes/") ); } } - elsif ( $opt eq "terminal" and not $options{ $opt } =~ /^(svg|post|dumb)/ ) + elsif ( $opt eq "terminal" and not $options{ $opt } =~ /^(svg|post|dumb|x11)/ ) { Maasha::Common::error( qq(Bad --$opt argument "$options{ $opt }") ); } @@ -1736,7 +1736,7 @@ sub script_read_soft # Returns nothing. - my ( $data_in, $file, $num, $records, $record, $soft_index, $fh, @platforms, $plat_table, @samples, $sample, $old_end ); + my ( $data_in, $file, $num, $records, $record, $soft_index, $fh, @platforms, $plat_table, @samples, $sample, $old_end, $skip ); while ( $record = get_record( $in ) ) { put_record( $record, $out ); @@ -1746,25 +1746,30 @@ sub script_read_soft foreach $file ( @{ $options->{ "files" } } ) { + print STDERR "Creating index for file: $file\n" if $options->{ "verbose" }; + $soft_index = Maasha::NCBI::soft_index_file( $file ); $fh = Maasha::Common::read_open( $file ); - @platforms = grep { $_->[ 0 ] =~ /PLATFORM/ } @{ $soft_index }; + @platforms = grep { $_->{ "SECTION" } =~ /PLATFORM/ } @{ $soft_index }; + + print STDERR "Getting platform tables for file: $file\n" if $options->{ "verbose" }; - $plat_table = Maasha::NCBI::soft_get_platform( $fh, $platforms[ 0 ]->[ 1 ], $platforms[ -1 ]->[ 2 ] ); + $plat_table = Maasha::NCBI::soft_get_platform( $fh, $platforms[ 0 ]->{ "LINE_BEG" }, $platforms[ -1 ]->{ "LINE_END" } ); - @samples = grep { $_->[ 0 ] =~ /SAMPLE/ } @{ $soft_index }; + @samples = grep { $_->{ "SECTION" } =~ /SAMPLE/ } @{ $soft_index }; - $old_end = $platforms[ -1 ]->[ 2 ]; + $old_end = $platforms[ -1 ]->{ "LINE_END" }; foreach $sample ( @samples ) { - if ( $options->{ "samples" } ) { - next if grep { $sample->[ 0 ] !~ $_ } @{ $options->{ "samples" } }; - } + $skip = 0; + $skip = 1 if ( $options->{ "samples" } and grep { $sample->{ "SECTION" } !~ /$_/ } @{ $options->{ "samples" } } ); + + print STDERR "Getting samples for dataset: $sample->{ 'SECTION' }\n" if $options->{ "verbose" } and not $skip; - $records = Maasha::NCBI::soft_get_sample( $fh, $plat_table, $sample->[ 1 ] - $old_end - 1, $sample->[ 2 ] - $old_end - 1 ); + $records = Maasha::NCBI::soft_get_sample( $fh, $plat_table, $sample->{ "LINE_BEG" } - $old_end - 1, $sample->{ "LINE_END" } - $old_end - 1, $skip ); foreach $record ( @{ $records } ) { @@ -1775,7 +1780,7 @@ sub script_read_soft $num++; } - $old_end = $sample->[ 2 ]; + $old_end = $sample->{ "LINE_END" }; } close $fh; diff --git a/code_perl/Maasha/NCBI.pm b/code_perl/Maasha/NCBI.pm index e6b7407..a94dc4a 100644 --- a/code_perl/Maasha/NCBI.pm +++ b/code_perl/Maasha/NCBI.pm @@ -155,7 +155,7 @@ sub soft_parse @fields = split "\t", $lines[ $c ]; $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ]; - + $c++; } @@ -233,8 +233,8 @@ sub soft_index_file $first = 1; - $i = 0; - $c = 0; + $i = 0; + $c = 0; while ( $line = <$fh> ) { @@ -242,11 +242,14 @@ sub soft_index_file if ( $line =~ /^\^/ ) { - push @index, [ $line, $i ]; + push @index, { + SECTION => $line, + LINE_BEG => $i, + }; if ( not $first ) { - push @{ $index[ $c - 1 ] }, $i - 1; + $index[ $c - 1 ]{ "LINE_END" } = $i - 1; } else { @@ -259,7 +262,7 @@ sub soft_index_file $i++; } - push @{ $index[ $c - 1 ] }, $i - 1; + $index[ $c - 1 ]{ "LINE_END" } = $i - 1; close $fh; @@ -271,7 +274,7 @@ sub soft_get_platform { # Martin A. Hansen, June 2008. - # Given a filehandle to a SOFT file parses the platform table + # Given a filehandle to a SOFT file parses all the platform tables # which is returned. my ( $fh, # filehandle @@ -314,7 +317,7 @@ sub soft_get_platform { @fields = split "\t", $lines[ $c ]; - $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ]; + $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ] || $fields[ $key_hash{ "ID" } ]; $c++; } @@ -333,16 +336,17 @@ sub soft_get_sample { # Martin A. Hansen, June 2008. - # Given a filehandle to a SOFT file parses the platform table - # which is returned. + # Given a filehandle to a SOFT file and a platform table + # parses sample records which are returned. - my ( $fh, # filehandle - $plat_table, # hashref with platform tables - $beg, # line number where sample table begin - $end, # line number where sample table end + my ( $fh, # filehandle + $plat_table, # hashref with platform table + $beg, # line number where sample table begin + $end, # line number where sample table end + $skip, # flag indicating that this sample table should not be parsed. ) = @_; - # Returns hashref + # Returns list of hashref my ( $line, @lines, $i, $c, $platform_id, @fields, %key_hash, $num, $sample_id, $sample_title, $id, $seq, $count, @records, $record ); @@ -350,15 +354,22 @@ sub soft_get_sample while ( $line = <$fh> ) { - chomp $line; + if ( not $skip ) + { + chomp $line; + + push @lines, $line if $i >= $beg; + } - push @lines, $line if $i >= $beg; - last if $i == $end; $i++; } + if ( $skip ) { + return wantarray ? () : []; + } + $i = 0; $num = 1; -- 2.39.5