]> git.donarmstrong.com Git - biopieces.git/commitdiff
upgraded mean_scores
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 30 May 2011 15:12:22 +0000 (15:12 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 30 May 2011 15:12:22 +0000 (15:12 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1450 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/mean_scores
bp_test/in/mean_scores.in [new file with mode: 0644]
bp_test/out/mean_scores.out.1 [new file with mode: 0644]
bp_test/out/mean_scores.out.2 [new file with mode: 0644]
bp_test/out/mean_scores.out.3 [new file with mode: 0644]
bp_test/out/mean_scores.out.4 [new file with mode: 0644]
bp_test/test/test_mean_scores [new file with mode: 0755]
code_perl/Maasha/Fastq.pm

index ab98dec3881da6417c14f519facb48dc4820c3af..bd47e5b8223faa76793c026f6982a50a048c16b6 100755 (executable)
@@ -35,7 +35,7 @@ use Maasha::Fastq;
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 
-my ( $options, $in, $out, $record );
+my ( $options, $in, $out, $record, $mean, $pos );
 
 $options = Maasha::Biopieces::parse_options(
     [
@@ -54,11 +54,9 @@ while ( $record = Maasha::Biopieces::get_record( $in ) )
     {
         if ( $options->{ "local" } )
         {
-            $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", Maasha::Fastq::solexa_str_mean_window(
-                                                                    $record->{ 'SCORES' }, 
-                                                                    $options->{ 'window_size' }, 
-                                                                    $options->{ 'min' }
-                                                                ) );
+            ( $mean, $pos ) = Maasha::Fastq::solexa_str_mean_window( $record->{ 'SCORES' }, $options->{ 'window_size' }, $options->{ 'min' } );
+            $record->{ 'SCORES_LOCAL_POS' }  = $pos;
+            $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", $mean);
         }
         else
         {
diff --git a/bp_test/in/mean_scores.in b/bp_test/in/mean_scores.in
new file mode 100644 (file)
index 0000000..62229f8
--- /dev/null
@@ -0,0 +1,35 @@
+SEQ_NAME: test1
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test2
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test3
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test4
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+---
+SEQ_NAME: test5
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+---
+SEQ_NAME: test6
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
+SEQ_NAME: test7
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+---
diff --git a/bp_test/out/mean_scores.out.1 b/bp_test/out/mean_scores.out.1
new file mode 100644 (file)
index 0000000..85013fb
--- /dev/null
@@ -0,0 +1,42 @@
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test1
+SCORES_MEAN: 40.00
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test2
+SCORES_MEAN: 35.13
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test3
+SCORES_MEAN: 30.26
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test4
+SCORES_MEAN: 35.13
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test5
+SCORES_MEAN: 30.26
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test6
+SCORES_MEAN: 35.13
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SEQ_LEN: 39
+SEQ_NAME: test7
+SCORES_MEAN: 30.26
+---
diff --git a/bp_test/out/mean_scores.out.2 b/bp_test/out/mean_scores.out.2
new file mode 100644 (file)
index 0000000..daace59
--- /dev/null
@@ -0,0 +1,49 @@
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 40.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test1
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 3
+SEQ_LEN: 39
+SEQ_NAME: test2
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 3
+SEQ_LEN: 39
+SEQ_NAME: test3
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 33
+SEQ_LEN: 39
+SEQ_NAME: test4
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SCORES_LOCAL_MEAN: 9.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 28
+SEQ_LEN: 39
+SEQ_NAME: test5
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test6
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test7
+---
diff --git a/bp_test/out/mean_scores.out.3 b/bp_test/out/mean_scores.out.3
new file mode 100644 (file)
index 0000000..7ecb05b
--- /dev/null
@@ -0,0 +1,49 @@
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 40.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test1
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 4
+SEQ_LEN: 39
+SEQ_NAME: test2
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 4
+SEQ_LEN: 39
+SEQ_NAME: test3
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 34
+SEQ_LEN: 39
+SEQ_NAME: test4
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 29
+SEQ_LEN: 39
+SEQ_NAME: test5
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test6
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test7
+---
diff --git a/bp_test/out/mean_scores.out.4 b/bp_test/out/mean_scores.out.4
new file mode 100644 (file)
index 0000000..9b40030
--- /dev/null
@@ -0,0 +1,49 @@
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 40.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test1
+---
+SCORES: hhhhBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 21.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test2
+---
+SCORES: hhhhBBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 4
+SEQ_LEN: 39
+SEQ_NAME: test3
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBB
+SCORES_LOCAL_MEAN: 21.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test4
+---
+SCORES: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhBBBBBBBBBB
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 29
+SEQ_LEN: 39
+SEQ_NAME: test5
+---
+SCORES: BBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 21.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: -1
+SEQ_LEN: 39
+SEQ_NAME: test6
+---
+SCORES: BBBBBBBBBBhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+SCORES_LOCAL_MEAN: 2.00
+SEQ: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+SCORES_LOCAL_POS: 0
+SEQ_LEN: 39
+SEQ_NAME: test7
+---
diff --git a/bp_test/test/test_mean_scores b/bp_test/test/test_mean_scores
new file mode 100755 (executable)
index 0000000..6ff413f
--- /dev/null
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -I $in -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -I $in -l -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in -l -m 2 -O $tmp"
+assert_no_diff $tmp $out.3
+clean
+
+run "$bp -I $in -l -m 2 -w 10 -O $tmp"
+assert_no_diff $tmp $out.4
+clean
index 974c9c99abfc5c0c4e5dc9d08a259d5bcb0c0291..b48279c24781b3722cbae9976c776eeb25639eec 100644 (file)
@@ -243,7 +243,7 @@ double solexa_str_mean( char *scores )
 }
 
 
-double solexa_str_mean_window( char *scores, int window_size, double min )
+void solexa_str_mean_window( char *scores, int window_size, double min )
 {
     /* Martin A. Hansen, June 2010. */
 
@@ -253,9 +253,11 @@ double solexa_str_mean_window( char *scores, int window_size, double min )
     /* is lower than a given minimum otherwise the smallest mean */
     /* score is returned. */
 
-    int    i    = 0;
-    double sum  = 0;
-    double mean = 0.0;
+    int    found = 0;
+    int    i     = 0;
+    int    pos   = -1;
+    double sum   = 0;
+    double mean  = 0.0;
 
     if ( window_size > strlen( scores ) )
     {
@@ -271,13 +273,14 @@ double solexa_str_mean_window( char *scores, int window_size, double min )
 
     mean = sum / window_size;
 
-    if ( mean < min ) {
-        return mean;
+    if ( mean <= min ) {
+        found = 1;
+        pos   = 0;
     }
 
     /* --- scan the rest of the scores ---- */
 
-    while ( i < strlen( scores ) )
+    while ( ! found && i < strlen( scores ) )
     {
         sum += solexa2dec( scores[ i ] );
         sum -= solexa2dec( scores[ i - window_size ] );
@@ -286,14 +289,21 @@ double solexa_str_mean_window( char *scores, int window_size, double min )
 
         // printf( "char->%c   score->%d   sum->%f   mean->%f\n", scores[i], solexa2dec(scores[i]),sum, mean);
 
-        if ( mean < min ) {
-            return mean;
-        }
-
         i++;
+
+        if ( mean <= min ) {
+            found = 1;
+            pos   = i - window_size;
+        }
     }
 
-    return mean;
+    Inline_Stack_Vars;
+    Inline_Stack_Reset;
+
+    Inline_Stack_Push( sv_2mortal( newSViv( mean ) ) );
+    Inline_Stack_Push( sv_2mortal( newSViv( pos ) ) );
+
+    Inline_Stack_Done;
 }