use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } );
-int phred2score( char c )
+/*
+About Phred or FASTQ format:
+
+This format is an adaption of the fasta format that contains quality scores.
+However, the fastq format is not completely compatible with the fastq files
+currently in existence, which is read by various applications (for example,
+BioPerl). Because a larger dynamic range of quality scores is used, the
+quality scores are encoded in ASCII as 64+score, instead of the standard
+32+score. This method is used to avoid running into non-printable characters.
+
+
+About Solexa or SCARF format:
+
+SCARF (Solexa compact ASCII read format)—This easy-to-parse text based format,
+stores all the information for a single read in one line. Allowed values are
+--numeric and --symbolic. --numeric outputs the quality values as a
+space-separated string of numbers. --symbolic outputs the quality values as a
+compact string of ASCII characters. Subtract 64 from the ASCII code to get the
+corresponding quality values. Under the current numbering scheme, quality
+values can theoretically be as low as -5, which has an ASCII encoding of 59=';'.
+
+See also:
+
+http://maq.sourceforge.net/fastq.shtml */
+
+
+int phred2dec( char c )
{
/* Martin A. Hansen, July 2009 */
- /* Converts a FASTQ/phred score in octal (a char) to a decimal score, */
+ /* Converts a Phred score in octal (a char) to a decimal score, */
/* which is returned. */
- /* http://maq.sourceforge.net/fastq.shtml */
-
int score = 0;
score = ( int ) c - 33;
}
-int solexa2score( char c )
+int solexa2dec( char c )
{
/* Martin A. Hansen, July 2009 */
/* Converts a Solexa score in octal (a char) to a decimal score, */
/* which is returned. */
- /* http://maq.sourceforge.net/fastq.shtml */
- /* $Q = 10 * log(1 + 10 ** (ord($sq) - 64) / 10.0)) / log(10); */
-
int score = 0;
- int ascii = ( int ) c - 64;
- score = 10 * log( 1 + pow( 10, ascii / 10 ) ) / log( 10 );
+ score = ( int ) c - 64;
return score;
}
-char score2phred( int score )
+// int solexa2dec( char c )
+// {
+// /* Martin A. Hansen, July 2009 */
+//
+// /* Converts a Solexa score in octal (a char) to a decimal score, */
+// /* which is returned. */
+//
+// /* http://maq.sourceforge.net/fastq.shtml */
+// /* $Q = 10 * log(1 + 10 ** (ord($sq) - 64) / 10.0)) / log(10); */
+//
+// int score = 0;
+// int ascii = ( int ) c - 64;
+//
+// score = 10 * log( 1 + pow( 10, ascii / 10 ) ) / log( 10 );
+//
+// // printf( "char: %c ascii: %d score: %d\n", c, ascii, score );
+//
+// return score;
+// }
+
+
+char dec2phred( int score )
{
/* Martin A. Hansen, July 2009 */
- /* Converts a decimal score in decimal to an octal score (a char), */
+ /* Converts a decimal score to an octal score (a char) in the Phred range, */
/* which is returned. */
- /* http://maq.sourceforge.net/fastq.shtml */
+ char c = 0;
- char c;
+ c = ( char ) score + 33;
+
+ return c;
+}
- if ( score <= 93 ) {
- c = ( char ) score + 33;
- } else {
- c = ( char ) 93 + 33;
- }
-// printf( "score: %d char: %c\n", score, c );
+char dec2solexa( int score )
+{
+ /* Martin A. Hansen, September 2009 */
+
+ /* Converts a decimal score to an octal score (a char) in the Solexa range, */
+ /* which is returned. */
+
+ char c = 0;
+
+ c = ( char ) score + 64;
return c;
}
-void solexa2phred( char *scores )
+char solexa2phred( char score )
{
- /* Martin A. Hansen, July 2009 */
+ /* Martin A. Hansen, September 2009 */
- /* Convert Solexa scores to Phred scores for use with FASTQ format.*/
+ /* Converts an octal score in the Solexa range to an octal score */
+ /* in the Pred range, which is returned. */
- int i = 0;
- int score = 0;
- char c;
+ int dec = 0;
+ char c = 0;
+
+ dec = solexa2dec( score );
+ c = dec2phred( c );
+
+ return c;
+}
+
+
+char phred2solexa( char score )
+{
+ /* Martin A. Hansen, September 2009 */
+
+ /* Converts an octal score in the Solexa range to an octal score */
+ /* in the Pred range, which is returned. */
+
+ int dec = 0;
+ char c = 0;
+
+ dec = phred2dec( score );
+ c = dec2solexa( c );
+
+ return c;
+}
+
+
+void solexa_str2phred_str( char *scores )
+{
+ /* Martin A. Hansen, September 2009 */
+
+ /* Converts a string of Solexa octal scores to Phred octal scores in-line. */
+
+ int i = 0;
+
+ for ( i + 0; i < strlen( scores ); i++ ) {
+ scores[ i ] = solexa2phred( scores[ i ] );
+ }
+}
- for ( i = 0; i < strlen( scores ); i++ )
- {
- score = solexa2score( scores[ i ] );
- c = score2phred( score );
-// printf( "scores[i]: %c score: %d char: %c\n", scores[ i ], score, c );
+void phred_str2solexa_str( char *scores )
+{
+ /* Martin A. Hansen, September 2009 */
+
+ /* Converts a string of Phred octal scores to Solexa octal scores in-line. */
- scores[ i ] = c;
+ int i = 0;
+
+ for ( i + 0; i < strlen( scores ); i++ ) {
+ scores[ i ] = phred2solexa( scores[ i ] );
+ }
+}
+
+
+void softmask_solexa_str( char *seq, char *scores, int threshold )
+{
+ /* Martin A. Hansen, July 2009 */
+
+ /* Given a sequence string and a score string (in Solexa range) */
+ /* lowercases all residues where the decimal score is below a given threshold. */
+
+ int i = 0;
+
+ for ( i = 0; i < strlen( seq ); i++ )
+ {
+ if ( solexa2dec( scores[ i ] ) < threshold ) {
+ seq[ i ] = tolower( seq[ i ] );
+ }
}
}
-void lowercase_low_scores( char *seq, char *scores, int threshold )
+void softmask_phred_str( char *seq, char *scores, int threshold )
{
/* Martin A. Hansen, July 2009 */
- /* Given a sequence string and a score string (in FASTQ/Phread range) */
+ /* Given a sequence string and a score string (in Phred range) */
/* lowercases all residues where the decimal score is below a given threshold. */
int i = 0;
for ( i = 0; i < strlen( seq ); i++ )
{
- if ( phred2score( scores[ i ] ) < threshold ) {
+ if ( phred2dec( scores[ i ] ) < threshold ) {
seq[ i ] = tolower( seq[ i ] );
}
}
# Returns list.
- my ( $tmp_file, $pid, $fh_in, $fh_out, $cmd, $i, $line, @lines, $xtic_space, @plot_cmd );
+ my ( $tmp_file, $pid, $fh_in, $fh_out, $cmd, $i, $line, @lines, $xtic_space, @plot_cmd, $title );
$tmp_dir ||= $ENV{ 'BP_TMP' };
print $fh_in "set logscale y\n" if $options->{ "logscale_y" };
print $fh_in "set grid\n" if not $options->{ "terminal" } eq "dumb";
print $fh_in "set autoscale\n";
- print $fh_in "unset key\n";
print $fh_in "set xtics border in scale 0 nomirror rotate by 90 offset character 0, 0, 0\n";
- for ( $i = 1; $i < scalar @{ $data->[ 0 ] } + 1; $i++ ) {
- push @plot_cmd, qq("$tmp_file" using $i with lines ls 1);
+ for ( $i = 1; $i < scalar @{ $data->[ 0 ] } + 1; $i++ )
+ {
+ $title = $options->{ 'keys' }->[ $i - 1 ] || $options->{ 'list' };
+ push @plot_cmd, qq("$tmp_file" using $i with lines title "$title");
}
print $fh_in "plot " . join( ", ", @plot_cmd ) . "\n";
print $fh_in "set style histogram title offset character 0, 0, 0\n";
print $fh_in "set style data histograms\n";
print $fh_in "set xtics border in scale 0 nomirror rotate by 90 offset character 0, 0, 0\n";
-
print $fh_in "plot '-' using 2:xticlabels(1)\n";
for ( $i = 0; $i < @{ $data }; $i++ )