X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_scripts%2FQA_454_report.rb;h=ea06bddb3a1c5b29f3ad2c336f8f1641cf94a099;hb=dd9446a30cfe771afdf6aa51dd5cc38307971e6a;hp=1201389ea785e4fa881f189f1c6af2461c20504c;hpb=46c9296a4c05233a498f4b21d7562ae84e787682;p=biopieces.git diff --git a/bp_scripts/QA_454_report.rb b/bp_scripts/QA_454_report.rb index 1201389..ea06bdd 100755 --- a/bp_scripts/QA_454_report.rb +++ b/bp_scripts/QA_454_report.rb @@ -31,7 +31,6 @@ class Report @seq_analysis = SeqAnalyze.new(@sff_file, tmpdir) @plots = PlotData.new(@sff_file, tmpdir) @table_mid_join = MidTable.new(@sff_file, tmpdir) - @table_freq = FreqTable.new(@sff_file, tmpdir) end # Support templating of member data. @@ -46,8 +45,7 @@ class Report def initialize(sff_file, tmpdir) @sff_file = sff_file - @out1_file = File.join(tmpdir, "out1.txt") - @out2_file = File.join(tmpdir, "out2.txt") + @anal_file = File.join(tmpdir, "out1.txt") @count = 0 @min = 0 @max = 0 @@ -59,7 +57,6 @@ class Report bp_seq_analyze parse_analyze_vals - parse_mean_vals end private @@ -69,24 +66,27 @@ class Report system( "read_sff -i #{@sff_file} | progress_meter | - analyze_vals -k SEQ -o #{@out1_file} | analyze_seq | - mean_vals -k 'GC%,HARD_MASK%,SOFT_MASK%' -o #{@out2_file} -x" + analyze_vals -k GC%,HARD_MASK%,SOFT_MASK% | + write_tab -co #{@anal_file} -x" ) STDERR.puts "done.\n" end def parse_analyze_vals - File.open(@out1_file, "r") do |ios| + File.open(@anal_file, "r") do |ios| while not ios.eof? line = ios.readline.chomp case line - when /COUNT\s+(\d+)/; then @count = $1 - when /MIN\s+(\d+)/; then @min = $1 - when /MAX\s+(\d+)/; then @max = $1 - when /MEAN\s+(\d+)/; then @mean = $1 - when /SUM\s+(\d+)/; then @bases = $1 + when /COUNT\s+(\d+)/ then @count = $1 + when /MIN\s+(\d+)/ then @min = $1 + when /MAX\s+(\d+)/ then @max = $1 + when /MEAN\s+(\d+)/ then @mean = $1 + when /SUM\s+(\d+)/ then @bases = $1 + when /GC%_MEAN:\s+(.+)/ then @gc = $1 + when /HARD_MASK%_MEAN:\s+(.+)/ then @hard = $1 + when /SOFT_MASK%_MEAN:\s+(.+)/ then @soft = $1 end end end @@ -108,7 +108,7 @@ class Report end class PlotData - attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores + attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores, :nucleotide_dist500, :nucleotide_dist50 def initialize(sff_file, tmpdir) @sff_file = sff_file @@ -117,29 +117,37 @@ class Report @plot3 = File.join(tmpdir, "plot3.png") @plot4 = File.join(tmpdir, "plot4.png") @plot5 = File.join(tmpdir, "plot5.png") + @plot6 = File.join(tmpdir, "plot6.png") + @plot7 = File.join(tmpdir, "plot7.png") bp_plot - @lendist_unclipped = png2base64(@plot1) - @lendist_clipped = png2base64(@plot3) - @scores_unclipped = png2base64(@plot2) - @scores_clipped = png2base64(@plot4) - @mean_scores = png2base64(@plot5) + @lendist_unclipped = png2base64(@plot1) + @lendist_clipped = png2base64(@plot3) + @scores_unclipped = png2base64(@plot2) + @scores_clipped = png2base64(@plot4) + @mean_scores = png2base64(@plot5) + @nucleotide_dist500 = png2base64(@plot6) + @nucleotide_dist50 = png2base64(@plot7) end def bp_plot STDERR.puts "Creating plots ... " system( - "read_sff -c -i #{@sff_file} | + "read_sff -m -i #{@sff_file} | progress_meter | plot_distribution -k SEQ_LEN -T 'Length Distribution - unclipped' -t png -o #{@plot1} | - plot_scores -T 'Mean Quality Scores - unclipped' -t png -o #{@plot2} | + plot_scores -c -T 'Mean Quality Scores - unclipped' -t png -o #{@plot2} | clip_seq | plot_distribution -k SEQ_LEN -T 'Length Distribution - clipped' -t png -o #{@plot3} | - plot_scores -T 'Mean Quality Scores - clipped' -t png -o #{@plot4} | + plot_scores -c -T 'Mean Quality Scores - clipped' -t png -o #{@plot4} | mean_scores | bin_vals -k SCORES_MEAN -b 5 | - plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} -x" + plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} | + extract_seq -l 500 | + plot_nucleotide_distribution -c -t png -o #{@plot6} | + extract_seq -l 50 | + plot_nucleotide_distribution -t png -o #{@plot7} -x" ) STDERR.puts "done.\n" end @@ -247,52 +255,6 @@ class Report end end end - - class FreqTable - def initialize(sff_file, tmpdir) - @sff_file = sff_file - @tab_file = File.join(tmpdir, "freq.tab") - - bp_frequency_table - end - - def each - File.open(@tab_file, "r") do |ios| - while not ios.eof? do - fields = ios.readline.chomp.split("\t") - yield FreqRow.new(fields[0], fields[1], fields[2], fields[3], fields[4]) - end - end - end - - private - - def bp_frequency_table - STDERR.puts "Creating residue frequency table ... " - system( - "read_sff -i #{@sff_file} | - progress_meter | - extract_seq -l 50 | - uppercase_seq | - create_weight_matrix -p | - flip_tab | - write_tab -o #{@tab_file} -x" - ) - STDERR.puts "done.\n" - end - - class FreqRow - attr_reader :res_A, :res_C, :res_G, :res_N, :res_T - - def initialize(res_A, res_C, res_G, res_N, res_T) - @res_A = res_A - @res_C = res_C - @res_G = res_G - @res_N = res_N - @res_T = res_T - end - end - end end template = %{ @@ -348,18 +310,10 @@ template = %{ <% end %>

Residue frequency analysis

-

The below table contains the residue frequency (in percent) of the first 50 bases:

- - <% @table_freq.each do |row| %> - - - - - - - - <% end %> -
<%= row.res_A %><%= row.res_T %><%= row.res_C %><%= row.res_G %><%= row.res_N %>
+

Plot of nucleotide distribution in percent of the first 50 bases:

+

plot_nucleotide_distribution

+

Plot of nucleotide distribution in percent of the first 500 bases:

+

plot_nucleotide_distribution

}.gsub(/^\s+/, '')