From 32c672cb89e2046c08a0f65b7df1fe19f2f9bcd2 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 30 May 2011 15:12:22 +0000 Subject: [PATCH] upgraded mean_scores git-svn-id: http://biopieces.googlecode.com/svn/trunk@1450 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/mean_scores | 10 +++---- bp_test/in/mean_scores.in | 35 +++++++++++++++++++++++++ bp_test/out/mean_scores.out.1 | 42 ++++++++++++++++++++++++++++++ bp_test/out/mean_scores.out.2 | 49 +++++++++++++++++++++++++++++++++++ bp_test/out/mean_scores.out.3 | 49 +++++++++++++++++++++++++++++++++++ bp_test/out/mean_scores.out.4 | 49 +++++++++++++++++++++++++++++++++++ bp_test/test/test_mean_scores | 19 ++++++++++++++ code_perl/Maasha/Fastq.pm | 34 +++++++++++++++--------- 8 files changed, 269 insertions(+), 18 deletions(-) create mode 100644 bp_test/in/mean_scores.in create mode 100644 bp_test/out/mean_scores.out.1 create mode 100644 bp_test/out/mean_scores.out.2 create mode 100644 bp_test/out/mean_scores.out.3 create mode 100644 bp_test/out/mean_scores.out.4 create mode 100755 bp_test/test/test_mean_scores diff --git a/bp_bin/mean_scores b/bp_bin/mean_scores index ab98dec..bd47e5b 100755 --- a/bp_bin/mean_scores +++ b/bp_bin/mean_scores @@ -35,7 +35,7 @@ use Maasha::Fastq; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $record ); +my ( $options, $in, $out, $record, $mean, $pos ); $options = Maasha::Biopieces::parse_options( [ @@ -54,11 +54,9 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) { if ( $options->{ "local" } ) { - $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", Maasha::Fastq::solexa_str_mean_window( - $record->{ 'SCORES' }, - $options->{ 'window_size' }, - $options->{ 'min' } - ) ); + ( $mean, $pos ) = Maasha::Fastq::solexa_str_mean_window( $record->{ 'SCORES' }, $options->{ 'window_size' }, $options->{ 'min' } ); + $record->{ 'SCORES_LOCAL_POS' } = $pos; + $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", $mean); } else { diff --git a/bp_test/in/mean_scores.in b/bp_test/in/mean_scores.in new file mode 100644 index 0000000..62229f8 --- /dev/null +++ b/bp_test/in/mean_scores.in @@ -0,0 +1,35 @@ +SEQ_NAME: test1 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +--- +SEQ_NAME: test2 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +--- +SEQ_NAME: test3 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh +--- +SEQ_NAME: test4 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB +--- +SEQ_NAME: test5 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB +--- +SEQ_NAME: test6 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +--- +SEQ_NAME: test7 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +--- diff --git a/bp_test/out/mean_scores.out.1 b/bp_test/out/mean_scores.out.1 new file mode 100644 index 0000000..85013fb --- /dev/null +++ b/bp_test/out/mean_scores.out.1 @@ -0,0 +1,42 @@ +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test1 +SCORES_MEAN: 40.00 +--- +SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test2 +SCORES_MEAN: 35.13 +--- +SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test3 +SCORES_MEAN: 30.26 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test4 +SCORES_MEAN: 35.13 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test5 +SCORES_MEAN: 30.26 +--- +SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test6 +SCORES_MEAN: 35.13 +--- +SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SEQ_LEN: 39 +SEQ_NAME: test7 +SCORES_MEAN: 30.26 +--- diff --git a/bp_test/out/mean_scores.out.2 b/bp_test/out/mean_scores.out.2 new file mode 100644 index 0000000..daace59 --- /dev/null +++ b/bp_test/out/mean_scores.out.2 @@ -0,0 +1,49 @@ +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 40.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: -1 +SEQ_LEN: 39 +SEQ_NAME: test1 +--- +SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 9.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 3 +SEQ_LEN: 39 +SEQ_NAME: test2 +--- +SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 9.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 3 +SEQ_LEN: 39 +SEQ_NAME: test3 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB +SCORES_LOCAL_MEAN: 9.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 33 +SEQ_LEN: 39 +SEQ_NAME: test4 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB +SCORES_LOCAL_MEAN: 9.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 28 +SEQ_LEN: 39 +SEQ_NAME: test5 +--- +SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 0 +SEQ_LEN: 39 +SEQ_NAME: test6 +--- +SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 0 +SEQ_LEN: 39 +SEQ_NAME: test7 +--- diff --git a/bp_test/out/mean_scores.out.3 b/bp_test/out/mean_scores.out.3 new file mode 100644 index 0000000..7ecb05b --- /dev/null +++ b/bp_test/out/mean_scores.out.3 @@ -0,0 +1,49 @@ +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 40.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: -1 +SEQ_LEN: 39 +SEQ_NAME: test1 +--- +SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 4 +SEQ_LEN: 39 +SEQ_NAME: test2 +--- +SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 4 +SEQ_LEN: 39 +SEQ_NAME: test3 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 34 +SEQ_LEN: 39 +SEQ_NAME: test4 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 29 +SEQ_LEN: 39 +SEQ_NAME: test5 +--- +SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 0 +SEQ_LEN: 39 +SEQ_NAME: test6 +--- +SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 0 +SEQ_LEN: 39 +SEQ_NAME: test7 +--- diff --git a/bp_test/out/mean_scores.out.4 b/bp_test/out/mean_scores.out.4 new file mode 100644 index 0000000..9b40030 --- /dev/null +++ b/bp_test/out/mean_scores.out.4 @@ -0,0 +1,49 @@ +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 40.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: -1 +SEQ_LEN: 39 +SEQ_NAME: test1 +--- +SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 21.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: -1 +SEQ_LEN: 39 +SEQ_NAME: test2 +--- +SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 4 +SEQ_LEN: 39 +SEQ_NAME: test3 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB +SCORES_LOCAL_MEAN: 21.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: -1 +SEQ_LEN: 39 +SEQ_NAME: test4 +--- +SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 29 +SEQ_LEN: 39 +SEQ_NAME: test5 +--- +SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 21.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: -1 +SEQ_LEN: 39 +SEQ_NAME: test6 +--- +SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +SCORES_LOCAL_MEAN: 2.00 +SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +SCORES_LOCAL_POS: 0 +SEQ_LEN: 39 +SEQ_NAME: test7 +--- diff --git a/bp_test/test/test_mean_scores b/bp_test/test/test_mean_scores new file mode 100755 index 0000000..6ff413f --- /dev/null +++ b/bp_test/test/test_mean_scores @@ -0,0 +1,19 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -I $in -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -I $in -l -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in -l -m 2 -O $tmp" +assert_no_diff $tmp $out.3 +clean + +run "$bp -I $in -l -m 2 -w 10 -O $tmp" +assert_no_diff $tmp $out.4 +clean diff --git a/code_perl/Maasha/Fastq.pm b/code_perl/Maasha/Fastq.pm index 974c9c9..b48279c 100644 --- a/code_perl/Maasha/Fastq.pm +++ b/code_perl/Maasha/Fastq.pm @@ -243,7 +243,7 @@ double solexa_str_mean( char *scores ) } -double solexa_str_mean_window( char *scores, int window_size, double min ) +void solexa_str_mean_window( char *scores, int window_size, double min ) { /* Martin A. Hansen, June 2010. */ @@ -253,9 +253,11 @@ double solexa_str_mean_window( char *scores, int window_size, double min ) /* is lower than a given minimum otherwise the smallest mean */ /* score is returned. */ - int i = 0; - double sum = 0; - double mean = 0.0; + int found = 0; + int i = 0; + int pos = -1; + double sum = 0; + double mean = 0.0; if ( window_size > strlen( scores ) ) { @@ -271,13 +273,14 @@ double solexa_str_mean_window( char *scores, int window_size, double min ) mean = sum / window_size; - if ( mean < min ) { - return mean; + if ( mean <= min ) { + found = 1; + pos = 0; } /* --- scan the rest of the scores ---- */ - while ( i < strlen( scores ) ) + while ( ! found && i < strlen( scores ) ) { sum += solexa2dec( scores[ i ] ); sum -= solexa2dec( scores[ i - window_size ] ); @@ -286,14 +289,21 @@ double solexa_str_mean_window( char *scores, int window_size, double min ) // printf( "char->%c score->%d sum->%f mean->%f\n", scores[i], solexa2dec(scores[i]),sum, mean); - if ( mean < min ) { - return mean; - } - i++; + + if ( mean <= min ) { + found = 1; + pos = i - window_size; + } } - return mean; + Inline_Stack_Vars; + Inline_Stack_Reset; + + Inline_Stack_Push( sv_2mortal( newSViv( mean ) ) ); + Inline_Stack_Push( sv_2mortal( newSViv( pos ) ) ); + + Inline_Stack_Done; } -- 2.39.5