fixed QA_454_report.rb to work with new analyze_vals

[biopieces.git] / bp_scripts / QA_454_report.rb
diff --git a/bp_scripts/QA_454_report.rb b/bp_scripts/QA_454_report.rb

index 1201389ea785e4fa881f189f1c6af2461c20504c..12397892fd0dd179cf19c3ef297fb04b20a05a19 100755 (executable)
--- a/bp_scripts/QA_454_report.rb
+++ b/bp_scripts/QA_454_report.rb
@@ -31,7 +31,6 @@ class Report
      @seq_analysis   = SeqAnalyze.new(@sff_file, tmpdir)
      @plots          = PlotData.new(@sff_file, tmpdir)
      @table_mid_join = MidTable.new(@sff_file, tmpdir)
-    @table_freq     = FreqTable.new(@sff_file, tmpdir)
    end
  
    # Support templating of member data.
@@ -46,8 +45,7 @@ class Report
  
      def initialize(sff_file, tmpdir)
        @sff_file  = sff_file
-      @out1_file = File.join(tmpdir, "out1.txt")
-      @out2_file = File.join(tmpdir, "out2.txt")
+      @anal_file = File.join(tmpdir, "out1.txt")
        @count     = 0
        @min       = 0
        @max       = 0
@@ -59,7 +57,6 @@ class Report
  
        bp_seq_analyze
        parse_analyze_vals
-      parse_mean_vals
      end
  
      private
@@ -69,38 +66,24 @@ class Report
        system(
          "read_sff -i #{@sff_file} |
           progress_meter |
-         analyze_vals -k SEQ -o #{@out1_file} |
           analyze_seq |
-         mean_vals -k 'GC%,HARD_MASK%,SOFT_MASK%' -o #{@out2_file} -x"
+         analyze_vals -k GC%,HARD_MASK%,SOFT_MASK% |
+         write_tab -o #{@anal_file} -x"
         )
        STDERR.puts "done.\n"
      end
  
      def parse_analyze_vals
-      File.open(@out1_file, "r") do |ios|
+      File.open(@anal_file, "r") do |ios|
          while not ios.eof?
-          line = ios.readline.chomp
-
-          case line
-          when /COUNT\s+(\d+)/; then @count = $1
-          when /MIN\s+(\d+)/;   then @min   = $1
-          when /MAX\s+(\d+)/;   then @max   = $1
-          when /MEAN\s+(\d+)/;  then @mean  = $1
-          when /SUM\s+(\d+)/;   then @bases = $1
-          end
-        end
-      end
-    end
-
-    def parse_mean_vals
-      File.open(@out2_file, "r") do |ios|
-        while not ios.eof?
-          line = ios.readline.chomp
-
-          case line
-          when /GC%_MEAN: (.+)/;        then @gc   = $1
-          when /HARD_MASK%_MEAN: (.+)/; then @hard = $1
-          when /SOFT_MASK%_MEAN: (.+)/; then @soft = $1
+          line   = ios.readline.chomp
+          fields = line.split("\t")
+
+          case fields.first
+          when "SEQ"        then @count, @min, @max, @sum, @mean = fields[2], fields[3], fields[4], fields[5], fields[6]
+          when "GC%"        then @gc   = fields[6]
+          when "HARD_MASK%" then @hard = fields[6]
+          when "SOFT_MASK%" then @soft = fields[6]
            end
          end
        end
@@ -108,7 +91,7 @@ class Report
    end
  
    class PlotData
-    attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores
+    attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores, :nucleotide_dist500, :nucleotide_dist50
  
      def initialize(sff_file, tmpdir)
        @sff_file = sff_file
@@ -117,29 +100,37 @@ class Report
        @plot3    = File.join(tmpdir, "plot3.png")
        @plot4    = File.join(tmpdir, "plot4.png")
        @plot5    = File.join(tmpdir, "plot5.png")
+      @plot6    = File.join(tmpdir, "plot6.png")
+      @plot7    = File.join(tmpdir, "plot7.png")
  
        bp_plot
  
-      @lendist_unclipped = png2base64(@plot1)
-      @lendist_clipped   = png2base64(@plot3)
-      @scores_unclipped  = png2base64(@plot2)
-      @scores_clipped    = png2base64(@plot4)
-      @mean_scores       = png2base64(@plot5)
+      @lendist_unclipped  = png2base64(@plot1)
+      @lendist_clipped    = png2base64(@plot3)
+      @scores_unclipped   = png2base64(@plot2)
+      @scores_clipped     = png2base64(@plot4)
+      @mean_scores        = png2base64(@plot5)
+      @nucleotide_dist500 = png2base64(@plot6)
+      @nucleotide_dist50  = png2base64(@plot7)
      end
  
      def bp_plot
        STDERR.puts "Creating plots ... "
        system(
-        "read_sff -c -i #{@sff_file} |
+        "read_sff -m -i #{@sff_file} |
           progress_meter |
           plot_distribution -k SEQ_LEN -T 'Length Distribution - unclipped' -t png -o #{@plot1} |
-         plot_scores -T 'Mean Quality Scores - unclipped' -t png -o #{@plot2} |
+         plot_scores -c -T 'Mean Quality Scores - unclipped' -t png -o #{@plot2} |
           clip_seq |
           plot_distribution -k SEQ_LEN -T 'Length Distribution - clipped' -t png -o #{@plot3} |
-         plot_scores -T 'Mean Quality Scores - clipped' -t png -o #{@plot4} |
+         plot_scores -c -T 'Mean Quality Scores - clipped' -t png -o #{@plot4} |
           mean_scores |
           bin_vals -k SCORES_MEAN -b 5 |
-         plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} -x"
+         plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} |
+         extract_seq -l 500 |
+         plot_nucleotide_distribution -c -t png -o #{@plot6} |
+         extract_seq -l 50 |
+         plot_nucleotide_distribution -t png -o #{@plot7} -x"
        )
        STDERR.puts "done.\n"
      end
@@ -247,52 +238,6 @@ class Report
        end
      end
    end
-
-  class FreqTable
-    def initialize(sff_file, tmpdir)
-      @sff_file = sff_file
-      @tab_file = File.join(tmpdir, "freq.tab")
-
-      bp_frequency_table
-    end
-
-    def each
-      File.open(@tab_file, "r") do |ios|
-        while not ios.eof? do
-          fields = ios.readline.chomp.split("\t")
-          yield FreqRow.new(fields[0], fields[1], fields[2], fields[3], fields[4])
-        end
-      end
-    end
-
-    private
-
-    def bp_frequency_table
-      STDERR.puts "Creating residue frequency table ... "
-      system(
-        "read_sff -i #{@sff_file} |
-         progress_meter |
-         extract_seq -l 50 |
-         uppercase_seq |
-         create_weight_matrix -p |
-         flip_tab |
-         write_tab -o #{@tab_file} -x"
-      )
-      STDERR.puts "done.\n"
-    end
-
-    class FreqRow
-      attr_reader :res_A, :res_C, :res_G, :res_N, :res_T
-
-      def initialize(res_A, res_C, res_G, res_N, res_T)
-        @res_A = res_A
-        @res_C = res_C
-        @res_G = res_G
-        @res_N = res_N
-        @res_T = res_T
-      end
-    end
-  end
  end
  
  template = %{
@@ -348,18 +293,10 @@ template = %{
        <% end %>
        </table>
        <h2>Residue frequency analysis</h2>
-      <p>The below table contains the residue frequency (in percent) of the first 50 bases:</p>
-      <table>
-      <% @table_freq.each do |row| %>
-        <tr>
-        <td><%= row.res_A %></td>
-        <td><%= row.res_T %></td>
-        <td><%= row.res_C %></td>
-        <td><%= row.res_G %></td>
-        <td><%= row.res_N %></td>
-        </tr>
-      <% end %>
-      </table>
+      <p>Plot of nucleotide distribution in percent of the first 50 bases:</p>
+      <p><img alt="plot_nucleotide_distribution" src="<%= @plots.nucleotide_dist50 %>" width="600" /></p>
+      <p>Plot of nucleotide distribution in percent of the first 500 bases:</p>
+      <p><img alt="plot_nucleotide_distribution" src="<%= @plots.nucleotide_dist500 %>" width="600" /></p>
      </body>
    </html>
  }.gsub(/^\s+/, '')