@seq_analysis = SeqAnalyze.new(@sff_file, tmpdir)
@plots = PlotData.new(@sff_file, tmpdir)
@table_mid_join = MidTable.new(@sff_file, tmpdir)
- @table_freq = FreqTable.new(@sff_file, tmpdir)
end
# Support templating of member data.
def initialize(sff_file, tmpdir)
@sff_file = sff_file
- @out1_file = File.join(tmpdir, "out1.txt")
- @out2_file = File.join(tmpdir, "out2.txt")
+ @anal_file = File.join(tmpdir, "out1.txt")
@count = 0
@min = 0
@max = 0
bp_seq_analyze
parse_analyze_vals
- parse_mean_vals
end
private
system(
"read_sff -i #{@sff_file} |
progress_meter |
- analyze_vals -k SEQ -o #{@out1_file} |
analyze_seq |
- mean_vals -k 'GC%,HARD_MASK%,SOFT_MASK%' -o #{@out2_file} -x"
+ analyze_vals -k GC%,HARD_MASK%,SOFT_MASK% |
+ write_tab -o #{@anal_file} -x"
)
STDERR.puts "done.\n"
end
def parse_analyze_vals
- File.open(@out1_file, "r") do |ios|
+ File.open(@anal_file, "r") do |ios|
while not ios.eof?
- line = ios.readline.chomp
-
- case line
- when /COUNT\s+(\d+)/; then @count = $1
- when /MIN\s+(\d+)/; then @min = $1
- when /MAX\s+(\d+)/; then @max = $1
- when /MEAN\s+(\d+)/; then @mean = $1
- when /SUM\s+(\d+)/; then @bases = $1
- end
- end
- end
- end
-
- def parse_mean_vals
- File.open(@out2_file, "r") do |ios|
- while not ios.eof?
- line = ios.readline.chomp
-
- case line
- when /GC%_MEAN: (.+)/; then @gc = $1
- when /HARD_MASK%_MEAN: (.+)/; then @hard = $1
- when /SOFT_MASK%_MEAN: (.+)/; then @soft = $1
+ line = ios.readline.chomp
+ fields = line.split("\t")
+
+ case fields.first
+ when "SEQ" then @count, @min, @max, @sum, @mean = fields[2], fields[3], fields[4], fields[5], fields[6]
+ when "GC%" then @gc = fields[6]
+ when "HARD_MASK%" then @hard = fields[6]
+ when "SOFT_MASK%" then @soft = fields[6]
end
end
end
end
class PlotData
- attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores
+ attr_reader :lendist_unclipped, :lendist_clipped, :scores_unclipped, :scores_clipped, :mean_scores, :nucleotide_dist500, :nucleotide_dist50
def initialize(sff_file, tmpdir)
@sff_file = sff_file
@plot3 = File.join(tmpdir, "plot3.png")
@plot4 = File.join(tmpdir, "plot4.png")
@plot5 = File.join(tmpdir, "plot5.png")
+ @plot6 = File.join(tmpdir, "plot6.png")
+ @plot7 = File.join(tmpdir, "plot7.png")
bp_plot
- @lendist_unclipped = png2base64(@plot1)
- @lendist_clipped = png2base64(@plot3)
- @scores_unclipped = png2base64(@plot2)
- @scores_clipped = png2base64(@plot4)
- @mean_scores = png2base64(@plot5)
+ @lendist_unclipped = png2base64(@plot1)
+ @lendist_clipped = png2base64(@plot3)
+ @scores_unclipped = png2base64(@plot2)
+ @scores_clipped = png2base64(@plot4)
+ @mean_scores = png2base64(@plot5)
+ @nucleotide_dist500 = png2base64(@plot6)
+ @nucleotide_dist50 = png2base64(@plot7)
end
def bp_plot
STDERR.puts "Creating plots ... "
system(
- "read_sff -c -i #{@sff_file} |
+ "read_sff -m -i #{@sff_file} |
progress_meter |
plot_distribution -k SEQ_LEN -T 'Length Distribution - unclipped' -t png -o #{@plot1} |
- plot_scores -T 'Mean Quality Scores - unclipped' -t png -o #{@plot2} |
+ plot_scores -c -T 'Mean Quality Scores - unclipped' -t png -o #{@plot2} |
clip_seq |
plot_distribution -k SEQ_LEN -T 'Length Distribution - clipped' -t png -o #{@plot3} |
- plot_scores -T 'Mean Quality Scores - clipped' -t png -o #{@plot4} |
+ plot_scores -c -T 'Mean Quality Scores - clipped' -t png -o #{@plot4} |
mean_scores |
bin_vals -k SCORES_MEAN -b 5 |
- plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} -x"
+ plot_histogram -s num -k SCORES_MEAN_BIN -T 'Mean score bins' -X 'Bins (size 5)' -Y 'Count' -t png -o #{@plot5} |
+ extract_seq -l 500 |
+ plot_nucleotide_distribution -c -t png -o #{@plot6} |
+ extract_seq -l 50 |
+ plot_nucleotide_distribution -t png -o #{@plot7} -x"
)
STDERR.puts "done.\n"
end
end
end
end
-
- class FreqTable
- def initialize(sff_file, tmpdir)
- @sff_file = sff_file
- @tab_file = File.join(tmpdir, "freq.tab")
-
- bp_frequency_table
- end
-
- def each
- File.open(@tab_file, "r") do |ios|
- while not ios.eof? do
- fields = ios.readline.chomp.split("\t")
- yield FreqRow.new(fields[0], fields[1], fields[2], fields[3], fields[4])
- end
- end
- end
-
- private
-
- def bp_frequency_table
- STDERR.puts "Creating residue frequency table ... "
- system(
- "read_sff -i #{@sff_file} |
- progress_meter |
- extract_seq -l 50 |
- uppercase_seq |
- create_weight_matrix -p |
- flip_tab |
- write_tab -o #{@tab_file} -x"
- )
- STDERR.puts "done.\n"
- end
-
- class FreqRow
- attr_reader :res_A, :res_C, :res_G, :res_N, :res_T
-
- def initialize(res_A, res_C, res_G, res_N, res_T)
- @res_A = res_A
- @res_C = res_C
- @res_G = res_G
- @res_N = res_N
- @res_T = res_T
- end
- end
- end
end
template = %{
<% end %>
</table>
<h2>Residue frequency analysis</h2>
- <p>The below table contains the residue frequency (in percent) of the first 50 bases:</p>
- <table>
- <% @table_freq.each do |row| %>
- <tr>
- <td><%= row.res_A %></td>
- <td><%= row.res_T %></td>
- <td><%= row.res_C %></td>
- <td><%= row.res_G %></td>
- <td><%= row.res_N %></td>
- </tr>
- <% end %>
- </table>
+ <p>Plot of nucleotide distribution in percent of the first 50 bases:</p>
+ <p><img alt="plot_nucleotide_distribution" src="<%= @plots.nucleotide_dist50 %>" width="600" /></p>
+ <p>Plot of nucleotide distribution in percent of the first 500 bases:</p>
+ <p><img alt="plot_nucleotide_distribution" src="<%= @plots.nucleotide_dist500 %>" width="600" /></p>
</body>
</html>
}.gsub(/^\s+/, '')