removed debug message

[biopieces.git] / bp_scripts / QA_Illumina_report.rb
diff --git a/bp_scripts/QA_Illumina_report.rb b/bp_scripts/QA_Illumina_report.rb

index 2655b10a4856076fa41a88254db65e8e7e811459..cc6269f1d79280aaf06ba76a20a40d6af204bffb 100755 (executable)
--- a/bp_scripts/QA_Illumina_report.rb
+++ b/bp_scripts/QA_Illumina_report.rb
@@ -23,13 +23,28 @@ require 'tmpdir'
  require 'base64'
  require 'erb'
  
+class Numeric
+  def commify
+    self.to_s.gsub(/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/, '\1,')
+  end
+end
+
  def parse_analysis(file)
    data = {}
  
    File.open(file, 'r') do |ios|
      ios.each do |line|
-      key, val = line.chomp.split(' ')
-      data[key] = val.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
+      key, val = line.chomp.split(': ')
+        begin Integer(val)
+          val = val.to_i.commify
+        rescue
+          begin Float(val)
+            val = val.to_f.commify
+          rescue
+          end
+        end
+
+      data[key] = val
      end
    end
  
@@ -66,11 +81,11 @@ scores_bin_file                = File.join(tmpdir, 'scores_bin.png')
  STDERR.puts "Analyzing sequences ... "
  
  system(
-  "read_fastq -i #{seq_file} |
+  "read_fastq -e base_33 -i #{seq_file} |
     progress_meter |
     analyze_vals -k SEQ -o #{analyze_vals_file} |
     trim_seq -l 3 -m 25 |
-   grab -e 'SEQ_LEN > 0' |
+   grab -e 'SEQ_LEN > 20' |
     analyze_vals -k SEQ -o #{analyze_vals_trim_file} |
     find_adaptor -l 6 -L 6 -f ACACGACGCTCTTCCGATCT -r AGATCGGAAGAGCACACGTC |
     clip_adaptor |
@@ -78,7 +93,7 @@ system(
     analyze_vals -k SEQ -o #{analyze_vals_trim_noadapt_file} |
     plot_distribution -k SEQ_LEN -T 'Sequence length distribution' -X 'Sequence length' -t png -o #{lendist_file} |
     plot_scores -c -t png -o #{scores_file} |
-   plot_nucleotide_distribution -t png -o #{nucdist_file} |
+   plot_nucleotide_distribution -c -t png -o #{nucdist_file} |
     bin_vals -k SEQ_LEN -b 25 |
     plot_distribution -T '25 bases bin sequence length distribution' -X 'Sequence length' -k SEQ_LEN_BIN -t png -o #{lendist_bin_file} |
     mean_scores |
@@ -112,7 +127,7 @@ template = %{
        </table>
        <p>Sequence trimming was performed by removing from the ends all residues until 3 consecutive</p>
        <p>residues with quality score larger than or equal to 25.</p>
-      <p>All plots are after sequence trimming.</p>
+      <p>All plots are after sequence trimming and adaptor removal.</p>
        <h2>Sequence length distribution</h2>
        <p><img src="<%= png2base64(lendist_file) %>" width="600" /></p>
        <p><img src="<%= png2base64(lendist_bin_file) %>" width="600" /></p>