3 # Copyright (C) 2006-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for manipulation of FASTQ files and FASTQ entries.
27 # http://maq.sourceforge.net/fastq.shtml
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
37 use vars qw( @ISA @EXPORT );
45 @ISA = qw( Exporter );
47 use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } );
49 int phred2score( char c )
51 /* Martin A. Hansen, July 2009 */
53 /* Converts a FASTQ/phred score in octal (a char) to a decimal score, */
54 /* which is returned. */
56 /* http://maq.sourceforge.net/fastq.shtml */
60 score = ( int ) c - 33;
66 int solexa2score( char c )
68 /* Martin A. Hansen, July 2009 */
70 /* Converts a Solexa score in octal (a char) to a decimal score, */
71 /* which is returned. */
73 /* http://maq.sourceforge.net/fastq.shtml */
74 /* $Q = 10 * log(1 + 10 ** (ord($sq) - 64) / 10.0)) / log(10); */
77 int ascii = ( int ) c - 64;
79 score = 10 * log( 1 + pow( 10, ascii / 10 ) ) / log( 10 );
85 char score2phred( int score )
87 /* Martin A. Hansen, July 2009 */
89 /* Converts a decimal score in decimal to an octal score (a char), */
90 /* which is returned. */
92 /* http://maq.sourceforge.net/fastq.shtml */
97 c = ( char ) score + 33;
102 // printf( "score: %d char: %c\n", score, c );
108 void lowercase_low_scores( char *seq, char *scores, int threshold )
110 /* Martin A. Hansen, July 2009 */
112 /* Given a sequence string and a score string (in FASTQ/Phread range) */
113 /* lowercases all residues where the decimal score is below a given threshold. */
117 for ( i = 0; i < strlen( seq ); i++ )
119 if ( phred2score( scores[ i ] ) < threshold ) {
120 seq[ i ] = tolower( seq[ i ] );
126 void solexa2phred( char *scores )
128 /* Martin A. Hansen, July 2009 */
130 /* Convert Solexa scores to Phred scores for use with FASTQ format.*/
136 for ( i = 0; i < strlen( scores ); i++ )
138 score = solexa2score( scores[ i ] );
139 c = score2phred( score );
141 // printf( "scores[i]: %c score: %d char: %c\n", scores[ i ], score, c );
151 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
156 # Martin A. Hansen, July 2009.
158 # Gets the next FASTQ entry from a given filehandle.
160 my ( $fh, # filehandle
165 my ( $seq, $seq_name, $qual, $qual_name );
181 return wantarray ? ( $seq_name, $seq, $qual ) : [ $seq_name, $seq, $qual ];
187 # Martin A. Hansen, July 2009.
189 # Output a FASTQ entry to STDOUT or a filehandle.
191 my ( $entry, # FASTQ entry
192 $fh, # filehandle - OPTIONAL
199 print $fh "@" . $entry->[ SEQ_NAME ] . "\n";
200 print $fh $entry->[ SEQ ] . "\n";
202 print $fh $entry->[ SCORES ] . "\n";
208 # Martin A. Hansen, July 2009.
210 # Converts a FASTQ entry to a Biopiece record, where
211 # the FASTQ quality scores are converted to numerics.
213 my ( $entry, # FASTQ entry,
220 $record->{ 'SEQ' } = $entry->[ SEQ ];
221 $record->{ 'SEQ_NAME' } = $entry->[ SEQ_NAME ];
222 $record->{ 'SCORES' } = $entry->[ SCORES ];
224 return wantarray ? %{ $record } : $record;
230 # Martin A. Hansen, July 2009.
232 # Converts a Biopiece record to a FASTQ entry.
234 my ( $record, # Biopiece record
241 if ( exists $record->{ 'SEQ' } and exists $record->{ 'SEQ_NAME' } and exists $record->{ 'SCORES' } )
243 $list->[ SEQ_NAME ] = $record->{ 'SEQ_NAME' };
244 $list->[ SEQ ] = $record->{ 'SEQ' };
245 $list->[ SCORES ] = $record->{ 'SCORES' };
247 $list->[ SCORES ] =~ s/(\d+);/chr( ( $1 <= 93 ? $1 : 93 ) + 33 )/ge;
249 return wantarray ? @{ $list } : $list;
256 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<