# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-my ( $options, $in, $out, $record );
+my ( $options, $in, $out, $record, $mean, $pos );
$options = Maasha::Biopieces::parse_options(
[
{
if ( $options->{ "local" } )
{
- $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", Maasha::Fastq::solexa_str_mean_window(
- $record->{ 'SCORES' },
- $options->{ 'window_size' },
- $options->{ 'min' }
- ) );
+ ( $mean, $pos ) = Maasha::Fastq::solexa_str_mean_window( $record->{ 'SCORES' }, $options->{ 'window_size' }, $options->{ 'min' } );
+ $record->{ 'SCORES_LOCAL_POS' } = $pos;
+ $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", $mean);
}
else
{
--- /dev/null
+SEQ_NAME: test1
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test2
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test3
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test4
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+---
+SEQ_NAME: test5
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+---
+SEQ_NAME: test6
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test7
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
--- /dev/null
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test1
+SCORES_MEAN: 40.00
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test2
+SCORES_MEAN: 35.13
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test3
+SCORES_MEAN: 30.26
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test4
+SCORES_MEAN: 35.13
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test5
+SCORES_MEAN: 30.26
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test6
+SCORES_MEAN: 35.13
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test7
+SCORES_MEAN: 30.26
+---
--- /dev/null
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 40.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test1
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 3
+SEQ_LEN: 39
+SEQ_NAME: test2
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 3
+SEQ_LEN: 39
+SEQ_NAME: test3
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 33
+SEQ_LEN: 39
+SEQ_NAME: test4
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 28
+SEQ_LEN: 39
+SEQ_NAME: test5
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test6
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test7
+---
--- /dev/null
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 40.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test1
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 4
+SEQ_LEN: 39
+SEQ_NAME: test2
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 4
+SEQ_LEN: 39
+SEQ_NAME: test3
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 34
+SEQ_LEN: 39
+SEQ_NAME: test4
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 29
+SEQ_LEN: 39
+SEQ_NAME: test5
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test6
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test7
+---
--- /dev/null
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 40.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test1
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 21.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test2
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 4
+SEQ_LEN: 39
+SEQ_NAME: test3
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SCORES_LOCAL_MEAN: 21.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test4
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 29
+SEQ_LEN: 39
+SEQ_NAME: test5
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 21.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test6
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test7
+---
--- /dev/null
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -I $in -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -I $in -l -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in -l -m 2 -O $tmp"
+assert_no_diff $tmp $out.3
+clean
+
+run "$bp -I $in -l -m 2 -w 10 -O $tmp"
+assert_no_diff $tmp $out.4
+clean
}
-double solexa_str_mean_window( char *scores, int window_size, double min )
+void solexa_str_mean_window( char *scores, int window_size, double min )
{
/* Martin A. Hansen, June 2010. */
/* is lower than a given minimum otherwise the smallest mean */
/* score is returned. */
- int i = 0;
- double sum = 0;
- double mean = 0.0;
+ int found = 0;
+ int i = 0;
+ int pos = -1;
+ double sum = 0;
+ double mean = 0.0;
if ( window_size > strlen( scores ) )
{
mean = sum / window_size;
- if ( mean < min ) {
- return mean;
+ if ( mean <= min ) {
+ found = 1;
+ pos = 0;
}
/* --- scan the rest of the scores ---- */
- while ( i < strlen( scores ) )
+ while ( ! found && i < strlen( scores ) )
{
sum += solexa2dec( scores[ i ] );
sum -= solexa2dec( scores[ i - window_size ] );
// printf( "char->%c score->%d sum->%f mean->%f\n", scores[i], solexa2dec(scores[i]),sum, mean);
- if ( mean < min ) {
- return mean;
- }
-
i++;
+
+ if ( mean <= min ) {
+ found = 1;
+ pos = i - window_size;
+ }
}
- return mean;
+ Inline_Stack_Vars;
+ Inline_Stack_Reset;
+
+ Inline_Stack_Push( sv_2mortal( newSViv( mean ) ) );
+ Inline_Stack_Push( sv_2mortal( newSViv( pos ) ) );
+
+ Inline_Stack_Done;
}