From 9e506c77bf020025b9788beac9ece546e0dd93ee Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 16 Oct 2013 08:02:04 +0000 Subject: [PATCH] cleanup of analyze_vals added analyze_scores git-svn-id: http://biopieces.googlecode.com/svn/trunk@2236 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/analyze_scores | 60 +++++++++++++++++++ bp_bin/analyze_vals | 8 +-- bp_test/in/analyze_scores.in | 50 ++++++++++++++++ bp_test/out/analyze_scores.out.1 | 100 +++++++++++++++++++++++++++++++ bp_test/out/analyze_vals.out.1 | 4 +- bp_test/out/analyze_vals.out.2 | 2 +- bp_test/out/analyze_vals.out.3 | 2 +- bp_test/test/test_analyze_scores | 7 +++ 8 files changed, 225 insertions(+), 8 deletions(-) create mode 100755 bp_bin/analyze_scores create mode 100644 bp_test/in/analyze_scores.in create mode 100644 bp_test/out/analyze_scores.out.1 create mode 100755 bp_test/test/test_analyze_scores diff --git a/bp_bin/analyze_scores b/bp_bin/analyze_scores new file mode 100755 index 0000000..c015989 --- /dev/null +++ b/bp_bin/analyze_scores @@ -0,0 +1,60 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2013 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Analyze Illumina type SCORES strings in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'maasha/biopieces' +require 'maasha/seq' +require 'narray' +require 'pp' + +options = Biopieces.options_parse(ARGV) + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SCORES] + na = NArray.to_na(record[:SCORES], 'byte') + na -= Seq::SCORE_BASE + + record[:SCORES_LEN] = na.size + record[:SCORES_MIN] = na.min + record[:SCORES_MAX] = na.max + record[:SCORES_MEAN] = na.mean + record[:SCORES_MEDIAN] = na.median + end + + output.puts record + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_bin/analyze_vals b/bp_bin/analyze_vals index aa9d12a..e7e8249 100755 --- a/bp_bin/analyze_vals +++ b/bp_bin/analyze_vals @@ -98,10 +98,10 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| :KEY => key, :TYPE => value[:type].to_s.capitalize, :COUNT => value[:count], - :MIN => value[:min], - :MAX => value[:max], - :SUM => value[:sum], - :MEAN => (value[:sum] / value[:count].to_f).round(1) + :MIN => value[:min].is_a?(Float) ? "%0.2f" % value[:min] : value[:min], + :MAX => value[:max].is_a?(Float) ? "%0.2f" % value[:max] : value[:max], + :SUM => value[:sum].is_a?(Float) ? "%0.2f" % value[:sum] : value[:sum], + :MEAN => "%0.2f" % (value[:sum] / value[:count].to_f) } output.puts stat_record diff --git a/bp_test/in/analyze_scores.in b/bp_test/in/analyze_scores.in new file mode 100644 index 0000000..4a56dd4 --- /dev/null +++ b/bp_test/in/analyze_scores.in @@ -0,0 +1,50 @@ +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1 +SEQ: TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT +SEQ_LEN: 50 +SCORES: GGFBGGEADFAFFDDD,-5AC5?!C:)7?##################### +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1041:14486#TTAGGC/1 +SEQ: CATGGCGTATGCCAGACGGCCAGAACGATGGCCGCCGGGCTTCANNNAAG +SEQ_LEN: 50 +SCORES: FFFFDBD?EEEEEEEFGGFAGAGEFDF=BFGFFGGDDDD=ABAA###### +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1043:19446#TTAGGC/1 +SEQ: CGGTACTGATCGAGTGTCAGGCTGTTGATCGCCGCGGGCGGGGGTNNGAC +SEQ_LEN: 50 +SCORES: ECAEBEEEEEFFFFFEFFFFDDEEEGGGGGDEBEECBDAE@######### +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1044:7943#TTAGGC/1 +SEQ: CTGATGCATGAAGATAGTCGGATGCACAATATACACGGCTAACGCNNAGG +SEQ_LEN: 50 +SCORES: GGGGDGGGGGEFE?FEEEEEECEDEFFEDFGFDGFGEGGFDBCDD##ACA +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1045:16499#TTAGGC/1 +SEQ: CTTGGTGCCCGTCACGCGCACTGCGTCGCCCTGAATGCTCGCCTGNNCCT +SEQ_LEN: 50 +SCORES: DFDFFFF=FAADE??:CACADDAAD=BDDDD;5A:5C:CA:=7;:##B=: +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:4566#TTAGGC/1 +SEQ: CACTGAAGGGTTCGTCCATCAGGATGATGCGAGGATTCAGCGCCANNGCT +SEQ_LEN: 50 +SCORES: GGGAGBGGGGDGDFGGGGFGDGBF?GGGGGGGGAEFF=:DDDDC5##B=B +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:18609#TTAGGC/1 +SEQ: ATCTGGCCAAGGTCAGACAGAGCTTCCCGTAACTTCGGCACAGCGNNATC +SEQ_LEN: 50 +SCORES: GGGGG?FGBFFFFFFF?FFFEEDEEFFFFDDFB?BFDFFFC=DDD##@@@ +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:20035#TTAGGC/1 +SEQ: CGTCACGTGGCGTGAATCCAATCTCGGCTATGCGCTGATCGGCGCNNCCA +SEQ_LEN: 50 +SCORES: FDEDFEE?EE:ECDBB?ACD:BAC?DAAADEEEEBAE:E:C!C@C##C-+ +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1048:14107#TTAGGC/1 +SEQ: GAAAGTGCAATCTACCGTTGTCGATATGCTGTCCGGCCTTGGCTACNGTG +SEQ_LEN: 50 +SCORES: AAAB!=DDDDEEEDEEEEEDEEEDEEADEDEEEEEEEEEBAEEBEC#DBC +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1049:17891#TTAGGC/1 +SEQ: CATCACCCGCTTCGAACTCCCGCACAAAGGCCGGCGTTGGCGAACGATTT +SEQ_LEN: 50 +SCORES: AFGGGEGFGFBFFFB:FFFEFFFED5BEBB?DFDEFFF:FDE?CEDBDDD +--- diff --git a/bp_test/out/analyze_scores.out.1 b/bp_test/out/analyze_scores.out.1 new file mode 100644 index 0000000..f52edc1 --- /dev/null +++ b/bp_test/out/analyze_scores.out.1 @@ -0,0 +1,100 @@ +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1 +SEQ: TTCGGCATCGGCGGCGACGTTGGCGGCGGGGCCGGGCGGGTCGANNNCAT +SEQ_LEN: 50 +SCORES: GGFBGGEADFAFFDDD,-5AC5?!C:)7?##################### +SCORES_LEN: 50 +SCORES_MIN: 0 +SCORES_MAX: 38 +SCORES_MEAN: 17.86 +SCORES_MEDIAN: 16 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1041:14486#TTAGGC/1 +SEQ: CATGGCGTATGCCAGACGGCCAGAACGATGGCCGCCGGGCTTCANNNAAG +SEQ_LEN: 50 +SCORES: FFFFDBD?EEEEEEEFGGFAGAGEFDF=BFGFFGGDDDD=ABAA###### +SCORES_LEN: 50 +SCORES_MIN: 2 +SCORES_MAX: 38 +SCORES_MEAN: 31.26 +SCORES_MEDIAN: 36 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1043:19446#TTAGGC/1 +SEQ: CGGTACTGATCGAGTGTCAGGCTGTTGATCGCCGCGGGCGGGGGTNNGAC +SEQ_LEN: 50 +SCORES: ECAEBEEEEEFFFFFEFFFFDDEEEGGGGGDEBEECBDAE@######### +SCORES_LEN: 50 +SCORES_MIN: 2 +SCORES_MAX: 38 +SCORES_MEAN: 29.66 +SCORES_MEDIAN: 36 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1044:7943#TTAGGC/1 +SEQ: CTGATGCATGAAGATAGTCGGATGCACAATATACACGGCTAACGCNNAGG +SEQ_LEN: 50 +SCORES: GGGGDGGGGGEFE?FEEEEEECEDEFFEDFGFDGFGEGGFDBCDD##ACA +SCORES_LEN: 50 +SCORES_MIN: 2 +SCORES_MAX: 38 +SCORES_MEAN: 34.76 +SCORES_MEDIAN: 36 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1045:16499#TTAGGC/1 +SEQ: CTTGGTGCCCGTCACGCGCACTGCGTCGCCCTGAATGCTCGCCTGNNCCT +SEQ_LEN: 50 +SCORES: DFDFFFF=FAADE??:CACADDAAD=BDDDD;5A:5C:CA:=7;:##B=: +SCORES_LEN: 50 +SCORES_MIN: 2 +SCORES_MAX: 37 +SCORES_MEAN: 30.12 +SCORES_MEDIAN: 32 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:4566#TTAGGC/1 +SEQ: CACTGAAGGGTTCGTCCATCAGGATGATGCGAGGATTCAGCGCCANNGCT +SEQ_LEN: 50 +SCORES: GGGAGBGGGGDGDFGGGGFGDGBF?GGGGGGGGAEFF=:DDDDC5##B=B +SCORES_LEN: 50 +SCORES_MIN: 2 +SCORES_MAX: 38 +SCORES_MEAN: 34.1 +SCORES_MEDIAN: 37 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:18609#TTAGGC/1 +SEQ: ATCTGGCCAAGGTCAGACAGAGCTTCCCGTAACTTCGGCACAGCGNNATC +SEQ_LEN: 50 +SCORES: GGGGG?FGBFFFFFFF?FFFEEDEEFFFFDDFB?BFDFFFC=DDD##@@@ +SCORES_LEN: 50 +SCORES_MIN: 2 +SCORES_MAX: 38 +SCORES_MEAN: 34.1 +SCORES_MEDIAN: 37 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:20035#TTAGGC/1 +SEQ: CGTCACGTGGCGTGAATCCAATCTCGGCTATGCGCTGATCGGCGCNNCCA +SEQ_LEN: 50 +SCORES: FDEDFEE?EE:ECDBB?ACD:BAC?DAAADEEEEBAE:E:C!C@C##C-+ +SCORES_LEN: 50 +SCORES_MIN: 0 +SCORES_MAX: 37 +SCORES_MEAN: 30.5 +SCORES_MEDIAN: 34 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1048:14107#TTAGGC/1 +SEQ: GAAAGTGCAATCTACCGTTGTCGATATGCTGTCCGGCCTTGGCTACNGTG +SEQ_LEN: 50 +SCORES: AAAB!=DDDDEEEDEEEEEDEEEDEEADEDEEEEEEEEEBAEEBEC#DBC +SCORES_LEN: 50 +SCORES_MIN: 0 +SCORES_MAX: 36 +SCORES_MEAN: 33.52 +SCORES_MEDIAN: 36 +--- +SEQ_NAME: ILLUMINA-52179E_0004:2:1:1049:17891#TTAGGC/1 +SEQ: CATCACCCGCTTCGAACTCCCGCACAAAGGCCGGCGTTGGCGAACGATTT +SEQ_LEN: 50 +SCORES: AFGGGEGFGFBFFFB:FFFEFFFED5BEBB?DFDEFFF:FDE?CEDBDDD +SCORES_LEN: 50 +SCORES_MIN: 20 +SCORES_MAX: 38 +SCORES_MEAN: 34.9 +SCORES_MEDIAN: 36 +--- diff --git a/bp_test/out/analyze_vals.out.1 b/bp_test/out/analyze_vals.out.1 index 313969c..6afc4df 100644 --- a/bp_test/out/analyze_vals.out.1 +++ b/bp_test/out/analyze_vals.out.1 @@ -4,7 +4,7 @@ COUNT: 3 MIN: 3 MAX: 5 SUM: 13 -MEAN: 4.3 +MEAN: 4.33 --- KEY: V1 TYPE: Numeric @@ -12,5 +12,5 @@ COUNT: 3 MIN: 6 MAX: 123 SUM: 174 -MEAN: 58.0 +MEAN: 58.00 --- diff --git a/bp_test/out/analyze_vals.out.2 b/bp_test/out/analyze_vals.out.2 index 90c8c4e..e1d6bb5 100644 --- a/bp_test/out/analyze_vals.out.2 +++ b/bp_test/out/analyze_vals.out.2 @@ -4,5 +4,5 @@ COUNT: 3 MIN: 3 MAX: 5 SUM: 13 -MEAN: 4.3 +MEAN: 4.33 --- diff --git a/bp_test/out/analyze_vals.out.3 b/bp_test/out/analyze_vals.out.3 index 345c5dc..6d628d0 100644 --- a/bp_test/out/analyze_vals.out.3 +++ b/bp_test/out/analyze_vals.out.3 @@ -4,5 +4,5 @@ COUNT: 3 MIN: 6 MAX: 123 SUM: 174 -MEAN: 58.0 +MEAN: 58.00 --- diff --git a/bp_test/test/test_analyze_scores b/bp_test/test/test_analyze_scores new file mode 100755 index 0000000..61c5709 --- /dev/null +++ b/bp_test/test/test_analyze_scores @@ -0,0 +1,7 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -I $in -O $tmp" +assert_no_diff $tmp $out.1 +clean -- 2.39.2