X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_scripts%2FQA_454_report.rb;h=905bfc33fef791be3e6cffde9c4388210dcaa1f8;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=362c728289c406fea726167ee470a25578ec7650;hpb=af576f0bbbb660f3943d72e7b67fe1d77c76f706;p=biopieces.git diff --git a/bp_scripts/QA_454_report.rb b/bp_scripts/QA_454_report.rb index 362c728..905bfc3 100755 --- a/bp_scripts/QA_454_report.rb +++ b/bp_scripts/QA_454_report.rb @@ -45,8 +45,7 @@ class Report def initialize(sff_file, tmpdir) @sff_file = sff_file - @out1_file = File.join(tmpdir, "out1.txt") - @out2_file = File.join(tmpdir, "out2.txt") + @anal_file = File.join(tmpdir, "out1.txt") @count = 0 @min = 0 @max = 0 @@ -58,7 +57,6 @@ class Report bp_seq_analyze parse_analyze_vals - parse_mean_vals end private @@ -68,38 +66,24 @@ class Report system( "read_sff -i #{@sff_file} | progress_meter | - analyze_vals -k SEQ -o #{@out1_file} | analyze_seq | - mean_vals -k 'GC%,HARD_MASK%,SOFT_MASK%' -o #{@out2_file} -x" + analyze_vals -k SEQ,GC%,HARD_MASK%,SOFT_MASK% -x | + write_tab -o #{@anal_file} -x" ) STDERR.puts "done.\n" end def parse_analyze_vals - File.open(@out1_file, "r") do |ios| + File.open(@anal_file, "r") do |ios| while not ios.eof? - line = ios.readline.chomp - - case line - when /COUNT\s+(\d+)/; then @count = $1 - when /MIN\s+(\d+)/; then @min = $1 - when /MAX\s+(\d+)/; then @max = $1 - when /MEAN\s+(\d+)/; then @mean = $1 - when /SUM\s+(\d+)/; then @bases = $1 - end - end - end - end - - def parse_mean_vals - File.open(@out2_file, "r") do |ios| - while not ios.eof? - line = ios.readline.chomp - - case line - when /GC%_MEAN: (.+)/; then @gc = $1 - when /HARD_MASK%_MEAN: (.+)/; then @hard = $1 - when /SOFT_MASK%_MEAN: (.+)/; then @soft = $1 + line = ios.readline.chomp + fields = line.split("\t") + + case fields.first + when "SEQ" then @count, @min, @max, @bases, @mean = fields[2 .. 6] + when "GC%" then @gc = fields[6] + when "HARD_MASK%" then @hard = fields[6] + when "SOFT_MASK%" then @soft = fields[6] end end end @@ -107,7 +91,7 @@ class Report end class PlotData - attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores, :nucleotide_dist + attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores, :nucleotide_dist500, :nucleotide_dist50 def initialize(sff_file, tmpdir) @sff_file = sff_file @@ -117,15 +101,17 @@ class Report @plot4 = File.join(tmpdir, "plot4.png") @plot5 = File.join(tmpdir, "plot5.png") @plot6 = File.join(tmpdir, "plot6.png") + @plot7 = File.join(tmpdir, "plot7.png") bp_plot - @lendist_unclipped = png2base64(@plot1) - @lendist_clipped = png2base64(@plot3) - @scores_unclipped = png2base64(@plot2) - @scores_clipped = png2base64(@plot4) - @mean_scores = png2base64(@plot5) - @nucleotide_dist = png2base64(@plot6) + @lendist_unclipped = png2base64(@plot1) + @lendist_clipped = png2base64(@plot3) + @scores_unclipped = png2base64(@plot2) + @scores_clipped = png2base64(@plot4) + @mean_scores = png2base64(@plot5) + @nucleotide_dist500 = png2base64(@plot6) + @nucleotide_dist50 = png2base64(@plot7) end def bp_plot @@ -141,8 +127,10 @@ class Report mean_scores | bin_vals -k SCORES_MEAN -b 5 | plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} | + extract_seq -l 500 | + plot_nucleotide_distribution -c -t png -o #{@plot6} | extract_seq -l 50 | - plot_nucleotide_distribution -t png -o #{@plot6} -x" + plot_nucleotide_distribution -t png -o #{@plot7} -x" ) STDERR.puts "done.\n" end @@ -306,7 +294,9 @@ template = %{

Residue frequency analysis

Plot of nucleotide distribution in percent of the first 50 bases:

-

plot_nucleotide_distribution

+

plot_nucleotide_distribution

+

Plot of nucleotide distribution in percent of the first 500 bases:

+

plot_nucleotide_distribution

}.gsub(/^\s+/, '')