From 3ab402b6e30d1e6d5b75d95b0892a0c56aca00e7 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Sun, 2 Aug 2009 10:58:48 +0000 Subject: [PATCH] finishing read_embl git-svn-id: http://biopieces.googlecode.com/svn/trunk@620 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/EMBL.pm | 96 +++++++++++++--------------------------- 1 file changed, 31 insertions(+), 65 deletions(-) diff --git a/code_perl/Maasha/EMBL.pm b/code_perl/Maasha/EMBL.pm index 730bda0..5134b03 100644 --- a/code_perl/Maasha/EMBL.pm +++ b/code_perl/Maasha/EMBL.pm @@ -83,7 +83,7 @@ sub parse_embl_entry # returns data structure - my ( @lines, $i, %hash, $ft, $seq ); + my ( @lines, $i, %hash, $ft, $seq, $key, $val ); @lines = split "\n", $entry; @@ -93,16 +93,21 @@ sub parse_embl_entry { if ( exists $args->{ "keys" } ) { + $args->{ "keys" }->{ "RN" } = 1 if grep { $_ =~ /^R/ } keys %{ $args->{ "keys" } }; + if ( $lines[ $i ] =~ /^(\w{2})\s+(.*)/ and exists $args->{ "keys" }->{ $1 } ) { - if ( $1 =~ /RN|RP|RG|RA|RT|RL/ ) { - add_ref( \%hash, \@lines, $i, $args->{ "keys" } ); - } elsif ( exists $hash{ $1 } and $1 eq "FT" ) { - $hash{ $1 } .= "\n" . $2; + $key = $1; + $val = $2; + + if ( $key =~ /RN|RX|RP|RG|RA|RT|RL/ ) { + add_ref( \%hash, \@lines, $i, $args->{ "keys" } ) if $key eq "RN"; + } elsif ( exists $hash{ $key } and $key eq "FT" ) { + $hash{ $key } .= "\n" . $val; } elsif ( exists $hash{ $1 } ) { - $hash{ $1 } .= " " . $2; + $hash{ $key } .= " " . $val; } else { - $hash{ $1 } = $2; + $hash{ $key } = $val; } } elsif ( $lines[ $i ] =~ /^\s+(.*)\s+\d+$/ and exists $args->{ "keys" }->{ "SEQ" } ) @@ -114,14 +119,17 @@ sub parse_embl_entry { if ( $lines[ $i ] =~ /^(\w{2})\s+(.*)/ ) { - if ( $1 eq "RN" ) { - add_ref( \%hash, \@lines, $i ); - } elsif ( exists $hash{ $1 } and $1 eq "FT" ) { - $hash{ $1 } .= "\n" . $2; - } elsif ( exists $hash{ $1 } ) { - $hash{ $1 } .= " " . $2; + $key = $1; + $val = $2; + + if ( $key =~ /RN|RX|RP|RG|RA|RT|RL/ ) { + add_ref( \%hash, \@lines, $i ) if $key eq "RN"; + } elsif ( exists $hash{ $1 } and $key eq "FT" ) { + $hash{ $key } .= "\n" . $val; + } elsif ( exists $hash{ $key } ) { + $hash{ $key } .= " " . $val; } else { - $hash{ $1 } = $2; + $hash{ $key } = $val; } } elsif ( $lines[ $i ] =~ /^\s+(.*)\s+\d+$/ ) @@ -495,6 +503,9 @@ sub embl2biopieces my ( %args, $data, $record, $key_record, $key, $feat, $feat2, $qual, $qual_val, @records, $no_seq ); + local $Data::Dumper::Terse = 1; + local $Data::Dumper::Indent = 0; + map { $args{ 'keys' }{ $_ } = 1 } @{ $options->{ 'keys' } }; map { $args{ 'feats' }{ $_ } = 1 } @{ $options->{ 'features' } }; map { $args{ 'quals' }{ $_ } = 1 } @{ $options->{ 'qualifiers' } }; @@ -514,6 +525,8 @@ sub embl2biopieces $data = parse_embl_entry( $entry, \%args ); + # print Dumper( $data ); + foreach $key ( keys %{ $data } ) { if ( $key eq 'SEQ' and $no_seq ) { @@ -521,7 +534,7 @@ sub embl2biopieces } if ( $key eq 'REF' ) { - $key_record->{ $key } = inline_ref( $data->{ $key } ); + $key_record->{ $key } = Dumper( $data->{ $key } ); } if ( $key ne 'FT' and $key ne 'REF' ) { @@ -531,28 +544,11 @@ sub embl2biopieces if ( exists $data->{ 'FT' } ) { - foreach $feat ( keys %{ $data->{ 'FT' } } ) - { - $record = {}; - $record->{ "FEATURE" } = $feat; - - foreach $feat2 ( @{ $data->{ 'FT' }->{ $feat } } ) - { - foreach $qual ( keys %{ $feat2 } ) - { - $qual_val = join "; ", @{ $feat2->{ $qual } }; - - $qual = "SEQ" if $qual eq "_seq"; - $qual = "LOCATOR" if $qual eq "_locator"; - - $record->{ $qual } = $qual_val; - } - } + $record->{ 'FT' } = Dumper( $data->{ 'FT' } ); - map { $record->{ $_ } = $key_record->{ $_ } } keys %{ $key_record }; + map { $record->{ $_ } = $key_record->{ $_ } } keys %{ $key_record }; - push @records, $record; - } + push @records, $record; } else { @@ -563,34 +559,4 @@ sub embl2biopieces } -sub inline_ref -{ - # Martin A. Hansen, August 2009. - - # Flattens a data structure with EMBL reference info to a string. - - my ( $data, # list of hashrefs - ) = @_; - - # Returns a string. - - my ( $ref, $key, $str, @keys, @refs ); - - foreach $ref ( @{ $data } ) - { - undef @keys; - - foreach $key ( qw( RN RP RG RA RT RL ) ) { - push @keys, "$key => '$ref->{ $key }'" if exists $ref->{ $key }; - } - - push @refs, "{ " . join( ", ", @keys ) . " }"; - } - - $str = '[ ' . join( ', ', @refs ) . ' ]'; - - return $str; -} - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -- 2.39.2