From 8a4293acc98653da604dcadf9df83e82aad1c092 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 22 Jul 2008 01:25:07 +0000 Subject: [PATCH] added record2fasta to allow for aliasing of SEQ_NAME to Q_ID or S_ID git-svn-id: http://biopieces.googlecode.com/svn/trunk@187 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/Biopieces.pm | 92 ++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/code_perl/Maasha/Biopieces.pm b/code_perl/Maasha/Biopieces.pm index d375e2d..554ac36 100644 --- a/code_perl/Maasha/Biopieces.pm +++ b/code_perl/Maasha/Biopieces.pm @@ -2037,7 +2037,7 @@ sub script_format_genome # Returns nothing. - my ( $dir, $genome, $fasta_dir, $phastcons_dir, $vals, $fh_out, $record, $format, $index ); + my ( $dir, $genome, $fasta_dir, $phastcons_dir, $vals, $fh_out, $record, $format, $index, $entry ); $dir = $options->{ 'dir' } || $ENV{ 'BP_DATA' }; $genome = $options->{ 'genome' }; @@ -2072,9 +2072,9 @@ sub script_format_genome while ( $record = get_record( $in ) ) { - if ( $fh_out and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) + if ( $fh_out and $entry = record2fasta( $record ) ) { - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, $options->{ "wrap" } ); + Maasha::Fasta::put_entry( $entry, $fh_out, $options->{ "wrap" } ); } elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } ) { @@ -3296,8 +3296,6 @@ sub script_patscan_seq $i++; } - -# put_record( $record, $out ); } close $fh_out; @@ -3367,7 +3365,7 @@ sub script_create_blast_db # Returns nothing. - my ( $fh, $seq_type, $path, $record ); + my ( $fh, $seq_type, $path, $record, $entry ); $path = $options->{ "database" }; @@ -3377,11 +3375,11 @@ sub script_create_blast_db { put_record( $record, $out ) if not $options->{ "no_stream" }; - if ( $record->{ "SEQ" } and $record->{ "SEQ_NAME" } ) + if ( $entry = record2fasta( $record ) ) { - $seq_type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $seq_type; + $seq_type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not $seq_type; - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh ); + Maasha::Fasta::put_entry( $entry, $fh ); } } @@ -3410,7 +3408,7 @@ sub script_blast_seq # Returns nothing. - my ( $genome, $q_type, $s_type, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields ); + my ( $genome, $q_type, $s_type, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields, $entry ); $options->{ "e_val" } = 10 if not defined $options->{ "e_val" }; $options->{ "filter" } = "F"; @@ -3426,11 +3424,11 @@ sub script_blast_seq while ( $record = get_record( $in ) ) { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) + if ( $entry = record2fasta( $record ) ) { - $q_type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $q_type; + $q_type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not $q_type; - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out ); + Maasha::Fasta::put_entry( $entry, $fh_out ); } put_record( $record, $out ); @@ -3520,7 +3518,7 @@ sub script_blat_seq # Returns nothing. - my ( $blat_args, $genome_file, $query_file, $fh_in, $fh_out, $type, $record, $result_file, $entries ); + my ( $blat_args, $genome_file, $query_file, $fh_in, $fh_out, $type, $record, $result_file, $entries, $entry ); $genome_file = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/fasta/$options->{ 'genome' }.fna"; @@ -3543,10 +3541,10 @@ sub script_blat_seq while ( $record = get_record( $in ) ) { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) + if ( $entry = record2fasta( $record ) ) { - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, 80 ); - $type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type; + $type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not $type; + Maasha::Fasta::put_entry( $entry, $fh_out, 80 ); } put_record( $record, $out ); @@ -3584,7 +3582,7 @@ sub script_soap_seq # Returns nothing. - my ( $genome, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields ); + my ( $genome, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields, $entry ); $options->{ "mismatches" } ||= 2; $options->{ "gap_size" } ||= 0; @@ -3599,8 +3597,8 @@ sub script_soap_seq while ( $record = get_record( $in ) ) { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) { - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out ); + if ( $entry = record2fasta( $record ) ) { + Maasha::Fasta::put_entry( $entry, $fh_out ); } put_record( $record, $out ); @@ -3694,7 +3692,7 @@ sub script_create_vmatch_index # Returns nothing. - my ( $record, $file_tmp, $fh_tmp, $type ); + my ( $record, $file_tmp, $fh_tmp, $type, $entry ); if ( $options->{ "index_name" } ) { @@ -3704,11 +3702,11 @@ sub script_create_vmatch_index while ( $record = get_record( $in ) ) { - if ( $options->{ "index_name" } and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) + if ( $options->{ "index_name" } and $entry = record2fasta( $record ) ) { - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_tmp ); + Maasha::Fasta::put_entry( $entry, $fh_tmp ); - $type = Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type; + $type = Maasha::Seq::seq_guess_type( $entry->[ SEQ ] ) if not defined $type; } put_record( $record, $out ) if not $options->{ "no_stream" }; @@ -3795,14 +3793,14 @@ sub script_write_fasta # Returns nothing. - my ( $record, $fh ); + my ( $record, $fh, $entry ); $fh = write_stream( $options->{ "data_out" }, $options->{ "compress" } ); while ( $record = get_record( $in ) ) { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) { - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh, $options->{ "wrap" } ); + if ( $entry = record2fasta( $record ) ) { + Maasha::Fasta::put_entry( $entry, $fh, $options->{ "wrap" } ); } put_record( $record, $out ) if not $options->{ "no_stream" }; @@ -4142,7 +4140,7 @@ sub script_write_2bit # Returns nothing. - my ( $record, $mask, $tmp_file, $fh_tmp, $fh_in, $fh_out ); + my ( $record, $mask, $tmp_file, $fh_tmp, $fh_in, $fh_out, $entry ); $mask = 1 if not $options->{ "no_mask" }; @@ -4153,8 +4151,8 @@ sub script_write_2bit while ( $record = get_record( $in ) ) { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) { - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_tmp ); + if ( $entry = record2fasta( $record ) ) { + Maasha::Fasta::put_entry( $entry, $fh_tmp ); } put_record( $record, $out ) if not $options->{ "no_stream" }; @@ -4186,17 +4184,17 @@ sub script_write_solid # Returns nothing. - my ( $record, $fh, $seq_cs ); + my ( $record, $fh, $entry ); $fh = write_stream( $options->{ "data_out" }, $options->{ "compress" } ); while ( $record = get_record( $in ) ) { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) + if ( $entry = record2fasta( $record ) ) { - $seq_cs = Maasha::Solid::seq2color_space( uc $record->{ "SEQ" } ); + $entry->[ SEQ ] = Maasha::Solid::seq2color_space( uc $entry->[ SEQ ] ); - Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $seq_cs ], $fh, $options->{ "wrap" } ); + Maasha::Fasta::put_entry( $entry, $fh, $options->{ "wrap" } ); } put_record( $record, $out ) if not $options->{ "no_stream" }; @@ -5855,6 +5853,32 @@ sub script_upload_to_ucsc # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +sub record2fasta +{ + # Martin A. Hansen, July 2008. + + # Given a biopiece record converts it to a FASTA record. + # If no generic SEQ or SEQ_NAME is found, the Q_* and S_* are + # tried in that order. + + my ( $record, # record + ) = @_; + + # Returns a tuple. + + my ( $seq_name, $seq ); + + $seq_name = $record->{ "SEQ_NAME" } || $record->{ "Q_ID" } || $record->{ "S_ID" }; + $seq = $record->{ "SEQ" } || $record->{ "Q_SEQ" } || $record->{ "S_SEQ" }; + + if ( defined $seq_name and defined $seq ) { + return wantarray ? ( $seq_name, $seq ) : [ $seq_name, $seq ]; + } else { + return; + } +} + + sub read_stream { # Martin A. Hansen, July 2007. -- 2.39.5