]> git.donarmstrong.com Git - biopieces.git/commitdiff
cleanup of analyze_vals added analyze_scores
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 16 Oct 2013 08:02:04 +0000 (08:02 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 16 Oct 2013 08:02:04 +0000 (08:02 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2236 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/analyze_scores [new file with mode: 0755]
bp_bin/analyze_vals
bp_test/in/analyze_scores.in [new file with mode: 0644]
bp_test/out/analyze_scores.out.1 [new file with mode: 0644]
bp_test/out/analyze_vals.out.1
bp_test/out/analyze_vals.out.2
bp_test/out/analyze_vals.out.3
bp_test/test/test_analyze_scores [new file with mode: 0755]

diff --git a/bp_bin/analyze_scores b/bp_bin/analyze_scores
new file mode 100755 (executable)
index 0000000..c015989
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2013 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Analyze Illumina type SCORES strings in the stream.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'maasha/biopieces'
+require 'maasha/seq'
+require 'narray'
+require 'pp'
+
+options = Biopieces.options_parse(ARGV)
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  input.each_record do |record|
+    if record[:SCORES]
+      na = NArray.to_na(record[:SCORES], 'byte')
+      na -= Seq::SCORE_BASE
+
+      record[:SCORES_LEN]    = na.size
+      record[:SCORES_MIN]    = na.min
+      record[:SCORES_MAX]    = na.max
+      record[:SCORES_MEAN]   = na.mean
+      record[:SCORES_MEDIAN] = na.median
+    end
+
+    output.puts record
+  end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
index aa9d12ae536b280e196598fd54a03cbc03caad2b..e7e8249a8faf73c53bce080d333e56a73a2b7aa1 100755 (executable)
@@ -98,10 +98,10 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
       :KEY   => key,
       :TYPE  => value[:type].to_s.capitalize,
       :COUNT => value[:count],
-      :MIN   => value[:min],
-      :MAX   => value[:max],
-      :SUM   => value[:sum],
-      :MEAN  => (value[:sum] / value[:count].to_f).round(1)
+      :MIN   => value[:min].is_a?(Float) ? "%0.2f" % value[:min] : value[:min],
+      :MAX   => value[:max].is_a?(Float) ? "%0.2f" % value[:max] : value[:max],
+      :SUM   => value[:sum].is_a?(Float) ? "%0.2f" % value[:sum] : value[:sum],
+      :MEAN  => "%0.2f" % (value[:sum] / value[:count].to_f)
     }
 
     output.puts stat_record
diff --git a/bp_test/in/analyze_scores.in b/bp_test/in/analyze_scores.in
new file mode 100644 (file)
index 0000000..4a56dd4
--- /dev/null
@@ -0,0 +1,50 @@
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1
+SEQ: TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT
+SEQ_LEN: 50
+SCORES: GGFBGGEADFAFFDDD,-5AC5?!C:)7?#####################
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1041:14486#TTAGGC/1
+SEQ: CATGGCGTATGCCAGACGGCCAGAACGATGGCCGCCGGGCTTCANNNAAG
+SEQ_LEN: 50
+SCORES: FFFFDBD?EEEEEEEFGGFAGAGEFDF=BFGFFGGDDDD=ABAA######
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1043:19446#TTAGGC/1
+SEQ: CGGTACTGATCGAGTGTCAGGCTGTTGATCGCCGCGGGCGGGGGTNNGAC
+SEQ_LEN: 50
+SCORES: ECAEBEEEEEFFFFFEFFFFDDEEEGGGGGDEBEECBDAE@#########
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1044:7943#TTAGGC/1
+SEQ: CTGATGCATGAAGATAGTCGGATGCACAATATACACGGCTAACGCNNAGG
+SEQ_LEN: 50
+SCORES: GGGGDGGGGGEFE?FEEEEEECEDEFFEDFGFDGFGEGGFDBCDD##ACA
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1045:16499#TTAGGC/1
+SEQ: CTTGGTGCCCGTCACGCGCACTGCGTCGCCCTGAATGCTCGCCTGNNCCT
+SEQ_LEN: 50
+SCORES: DFDFFFF=FAADE??:CACADDAAD=BDDDD;5A:5C:CA:=7;:##B=:
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:4566#TTAGGC/1
+SEQ: CACTGAAGGGTTCGTCCATCAGGATGATGCGAGGATTCAGCGCCANNGCT
+SEQ_LEN: 50
+SCORES: GGGAGBGGGGDGDFGGGGFGDGBF?GGGGGGGGAEFF=:DDDDC5##B=B
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:18609#TTAGGC/1
+SEQ: ATCTGGCCAAGGTCAGACAGAGCTTCCCGTAACTTCGGCACAGCGNNATC
+SEQ_LEN: 50
+SCORES: GGGGG?FGBFFFFFFF?FFFEEDEEFFFFDDFB?BFDFFFC=DDD##@@@
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:20035#TTAGGC/1
+SEQ: CGTCACGTGGCGTGAATCCAATCTCGGCTATGCGCTGATCGGCGCNNCCA
+SEQ_LEN: 50
+SCORES: FDEDFEE?EE:ECDBB?ACD:BAC?DAAADEEEEBAE:E:C!C@C##C-+
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1048:14107#TTAGGC/1
+SEQ: GAAAGTGCAATCTACCGTTGTCGATATGCTGTCCGGCCTTGGCTACNGTG
+SEQ_LEN: 50
+SCORES: AAAB!=DDDDEEEDEEEEEDEEEDEEADEDEEEEEEEEEBAEEBEC#DBC
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1049:17891#TTAGGC/1
+SEQ: CATCACCCGCTTCGAACTCCCGCACAAAGGCCGGCGTTGGCGAACGATTT
+SEQ_LEN: 50
+SCORES: AFGGGEGFGFBFFFB:FFFEFFFED5BEBB?DFDEFFF:FDE?CEDBDDD
+---
diff --git a/bp_test/out/analyze_scores.out.1 b/bp_test/out/analyze_scores.out.1
new file mode 100644 (file)
index 0000000..f52edc1
--- /dev/null
@@ -0,0 +1,100 @@
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1
+SEQ: TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT
+SEQ_LEN: 50
+SCORES: GGFBGGEADFAFFDDD,-5AC5?!C:)7?#####################
+SCORES_LEN: 50
+SCORES_MIN: 0
+SCORES_MAX: 38
+SCORES_MEAN: 17.86
+SCORES_MEDIAN: 16
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1041:14486#TTAGGC/1
+SEQ: CATGGCGTATGCCAGACGGCCAGAACGATGGCCGCCGGGCTTCANNNAAG
+SEQ_LEN: 50
+SCORES: FFFFDBD?EEEEEEEFGGFAGAGEFDF=BFGFFGGDDDD=ABAA######
+SCORES_LEN: 50
+SCORES_MIN: 2
+SCORES_MAX: 38
+SCORES_MEAN: 31.26
+SCORES_MEDIAN: 36
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1043:19446#TTAGGC/1
+SEQ: CGGTACTGATCGAGTGTCAGGCTGTTGATCGCCGCGGGCGGGGGTNNGAC
+SEQ_LEN: 50
+SCORES: ECAEBEEEEEFFFFFEFFFFDDEEEGGGGGDEBEECBDAE@#########
+SCORES_LEN: 50
+SCORES_MIN: 2
+SCORES_MAX: 38
+SCORES_MEAN: 29.66
+SCORES_MEDIAN: 36
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1044:7943#TTAGGC/1
+SEQ: CTGATGCATGAAGATAGTCGGATGCACAATATACACGGCTAACGCNNAGG
+SEQ_LEN: 50
+SCORES: GGGGDGGGGGEFE?FEEEEEECEDEFFEDFGFDGFGEGGFDBCDD##ACA
+SCORES_LEN: 50
+SCORES_MIN: 2
+SCORES_MAX: 38
+SCORES_MEAN: 34.76
+SCORES_MEDIAN: 36
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1045:16499#TTAGGC/1
+SEQ: CTTGGTGCCCGTCACGCGCACTGCGTCGCCCTGAATGCTCGCCTGNNCCT
+SEQ_LEN: 50
+SCORES: DFDFFFF=FAADE??:CACADDAAD=BDDDD;5A:5C:CA:=7;:##B=:
+SCORES_LEN: 50
+SCORES_MIN: 2
+SCORES_MAX: 37
+SCORES_MEAN: 30.12
+SCORES_MEDIAN: 32
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:4566#TTAGGC/1
+SEQ: CACTGAAGGGTTCGTCCATCAGGATGATGCGAGGATTCAGCGCCANNGCT
+SEQ_LEN: 50
+SCORES: GGGAGBGGGGDGDFGGGGFGDGBF?GGGGGGGGAEFF=:DDDDC5##B=B
+SCORES_LEN: 50
+SCORES_MIN: 2
+SCORES_MAX: 38
+SCORES_MEAN: 34.1
+SCORES_MEDIAN: 37
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:18609#TTAGGC/1
+SEQ: ATCTGGCCAAGGTCAGACAGAGCTTCCCGTAACTTCGGCACAGCGNNATC
+SEQ_LEN: 50
+SCORES: GGGGG?FGBFFFFFFF?FFFEEDEEFFFFDDFB?BFDFFFC=DDD##@@@
+SCORES_LEN: 50
+SCORES_MIN: 2
+SCORES_MAX: 38
+SCORES_MEAN: 34.1
+SCORES_MEDIAN: 37
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:20035#TTAGGC/1
+SEQ: CGTCACGTGGCGTGAATCCAATCTCGGCTATGCGCTGATCGGCGCNNCCA
+SEQ_LEN: 50
+SCORES: FDEDFEE?EE:ECDBB?ACD:BAC?DAAADEEEEBAE:E:C!C@C##C-+
+SCORES_LEN: 50
+SCORES_MIN: 0
+SCORES_MAX: 37
+SCORES_MEAN: 30.5
+SCORES_MEDIAN: 34
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1048:14107#TTAGGC/1
+SEQ: GAAAGTGCAATCTACCGTTGTCGATATGCTGTCCGGCCTTGGCTACNGTG
+SEQ_LEN: 50
+SCORES: AAAB!=DDDDEEEDEEEEEDEEEDEEADEDEEEEEEEEEBAEEBEC#DBC
+SCORES_LEN: 50
+SCORES_MIN: 0
+SCORES_MAX: 36
+SCORES_MEAN: 33.52
+SCORES_MEDIAN: 36
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1049:17891#TTAGGC/1
+SEQ: CATCACCCGCTTCGAACTCCCGCACAAAGGCCGGCGTTGGCGAACGATTT
+SEQ_LEN: 50
+SCORES: AFGGGEGFGFBFFFB:FFFEFFFED5BEBB?DFDEFFF:FDE?CEDBDDD
+SCORES_LEN: 50
+SCORES_MIN: 20
+SCORES_MAX: 38
+SCORES_MEAN: 34.9
+SCORES_MEDIAN: 36
+---
index 313969c4ce35375b8539a89ae61e68347a334b5a..6afc4dffce518044a0b0d983a71ea66332117074 100644 (file)
@@ -4,7 +4,7 @@ COUNT: 3
 MIN: 3
 MAX: 5
 SUM: 13
-MEAN: 4.3
+MEAN: 4.33
 ---
 KEY: V1
 TYPE: Numeric
@@ -12,5 +12,5 @@ COUNT: 3
 MIN: 6
 MAX: 123
 SUM: 174
-MEAN: 58.0
+MEAN: 58.00
 ---
index 90c8c4e7728e4e2af10020122ecea5322127eeb1..e1d6bb53d440000fae31937937ad1552f15b5c92 100644 (file)
@@ -4,5 +4,5 @@ COUNT: 3
 MIN: 3
 MAX: 5
 SUM: 13
-MEAN: 4.3
+MEAN: 4.33
 ---
index 345c5dc1b9f34e41fbb3b5a874af924daabf7a31..6d628d00993493ce395ea71aab595e9f99d32f7e 100644 (file)
@@ -4,5 +4,5 @@ COUNT: 3
 MIN: 6
 MAX: 123
 SUM: 174
-MEAN: 58.0
+MEAN: 58.00
 ---
diff --git a/bp_test/test/test_analyze_scores b/bp_test/test/test_analyze_scores
new file mode 100755 (executable)
index 0000000..61c5709
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -I $in -O $tmp"
+assert_no_diff $tmp $out.1
+clean