{
if ( $record = Maasha::Fastq::fastq2biopiece( $entry ) )
{
- lowercase_low_scores( $record, $options->{ 'quality' } );
+ Maasha::Fastq::lowercase_low_scores( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'quality' } );
+
+ $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml
+ $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) );
Maasha::Biopieces::put_record( $record, $out );
}
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-sub lowercase_low_scores
-{
- # Martin A. Hansen, July 2009.
-
- # Lowercase residues in sequence that are below a given score threshold.
-
- # This one would be lovely to have written in C.
-
- my ( $record, # Biopiece record
- $quality, # quality threshold
- ) = @_;
-
- # Returns nothing.
-
- my ( @seqs, @scores, $i );
-
- @seqs = split //, $record->{ 'SEQ' };
- @scores = split /;/, $record->{ 'SCORES' };
-
- for ( $i = 0; $i < @scores; $i++ ) {
- $seqs[ $i ] = lc $seqs[ $i ] if $scores[ $i ] < $quality;
- }
-
- $record->{ 'SEQ' } = join "", @seqs;
- $record->{ 'SCORES' } = join ";", @scores;
-}
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
BEGIN
{
Maasha::Biopieces::status_set();
use warnings;
use strict;
+use Data::Dumper;
use Maasha::Biopieces;
use Maasha::Filesys;
use Maasha::Solexa;
+use Maasha::Fastq;
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$num = 1;
- if ( $options->{ "format" } eq "octal" )
+ while ( $entry = Maasha::Fastq::get_entry( $data_in ) )
{
- while ( $entry = Maasha::Solexa::solexa_get_entry_octal( $data_in ) )
+ if ( $record = Maasha::Fastq::fastq2biopiece( $entry ) )
{
- $record = Maasha::Solexa::solexa2biopiece( $entry, $options->{ "quality" } );
+ $record->{ 'SCORES' } =~ s/(\d+) ?/Maasha::Fastq::score2phred( $1 )/ge if $options->{ 'format' } eq 'decimal';
+ Maasha::Fastq::solexa2phred( $record->{ 'SCORES' } );
+ Maasha::Fastq::lowercase_low_scores( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'quality' } );
- Maasha::Biopieces::put_record( $record, $out );
-
- last if $options->{ "num" } and $num == $options->{ "num" };
-
- $num++;
- }
- }
- else
- {
- while ( $entry = Maasha::Solexa::solexa_get_entry_decimal( $data_in ) )
- {
- $record = Maasha::Solexa::solexa2biopiece( $entry, $options->{ "quality" } );
+ $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml
+ $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) );
Maasha::Biopieces::put_record( $record, $out );
-
- last if $options->{ "num" } and $num == $options->{ "num" };
-
- $num++;
}
+
+ last if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
}
close $data_in;
use Maasha::Calc;
use vars qw( @ISA @EXPORT );
-@ISA = qw( Exporter );
-
use constant {
SEQ_NAME => 0,
SEQ => 1,
SCORES => 2,
};
+@ISA = qw( Exporter );
+
+use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } );
+
+int phred2score( char c )
+{
+ /* Martin A. Hansen, July 2009 */
+
+ /* Converts a FASTQ/phred score in octal (a char) to a decimal score, */
+ /* which is returned. */
+
+ /* http://maq.sourceforge.net/fastq.shtml */
+
+ int score = 0;
+
+ score = ( int ) c - 33;
+
+ return score;
+}
+
+
+int solexa2score( char c )
+{
+ /* Martin A. Hansen, July 2009 */
+
+ /* Converts a Solexa score in octal (a char) to a decimal score, */
+ /* which is returned. */
+
+ /* http://maq.sourceforge.net/fastq.shtml */
+ /* $Q = 10 * log(1 + 10 ** (ord($sq) - 64) / 10.0)) / log(10); */
+
+ int score = 0;
+ int ascii = ( int ) c - 64;
+
+ score = 10 * log( 1 + pow( 10, ascii / 10 ) ) / log( 10 );
+
+ return score;
+}
+
+
+char score2phred( int score )
+{
+ /* Martin A. Hansen, July 2009 */
+
+ /* Converts a decimal score in decimal to an octal score (a char), */
+ /* which is returned. */
+
+ /* http://maq.sourceforge.net/fastq.shtml */
+
+ char c;
+
+ if ( score <= 93 ) {
+ c = ( char ) score + 33;
+ } else {
+ c = ( char ) 93 + 33;
+ }
+
+// printf( "score: %d char: %c\n", score, c );
+
+ return c;
+}
+
+
+void lowercase_low_scores( char *seq, char *scores, int threshold )
+{
+ /* Martin A. Hansen, July 2009 */
+
+ /* Given a sequence string and a score string (in FASTQ/Phread range) */
+ /* lowercases all residues where the decimal score is below a given threshold. */
+
+ int i = 0;
+
+ for ( i = 0; i < strlen( seq ); i++ )
+ {
+ if ( phred2score( scores[ i ] ) < threshold ) {
+ seq[ i ] = tolower( seq[ i ] );
+ }
+ }
+}
+
+
+void solexa2phred( char *scores )
+{
+ /* Martin A. Hansen, July 2009 */
+
+ /* Convert Solexa scores to Phred scores for use with FASTQ format.*/
+
+ int i = 0;
+ int score = 0;
+ char c;
+
+ for ( i = 0; i < strlen( scores ); i++ )
+ {
+ score = solexa2score( scores[ i ] );
+ c = score2phred( score );
+
+// printf( "scores[i]: %c score: %d char: %c\n", scores[ i ], score, c );
+
+ scores[ i ] = c;
+ }
+}
+
+
+END_C
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$record->{ 'SEQ_NAME' } = $entry->[ SEQ_NAME ];
$record->{ 'SCORES' } = $entry->[ SCORES ];
- $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml
- $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) );
-
return wantarray ? %{ $record } : $record;
}
# Returns a list.
- my ( $seq_header, $seq, $score_head, $score, @scores );
+ my ( $seq_header, $seq, $score_head, $score );
$seq_header = <$fh>;
$seq = <$fh>;
chomp $score_header;
chomp $score;
- @scores = split / /, $score;
-
$seq_header =~ s/^@//;
-
- $score = join( ";", @scores );
+ $score =~ s/ /;/g;
return wantarray ? ( $seq_header, $seq, $score ) : [ $seq_header, $seq, $score ];
}