From: martinahansen Date: Fri, 23 Nov 2012 20:46:50 +0000 (+0000) Subject: added QA_Illumina_report.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=2c9aea983d8a632d93316c073031c797747514e5;p=biopieces.git added QA_Illumina_report.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1993 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_scripts/QA_Illumina_report.rb b/bp_scripts/QA_Illumina_report.rb new file mode 100755 index 0000000..c7061a5 --- /dev/null +++ b/bp_scripts/QA_Illumina_report.rb @@ -0,0 +1,126 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2012 Martin A. Hansen (mail@maasha.dk). + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +require 'pp' +require 'tmpdir' +require 'base64' +require 'erb' + +def parse_analysis(file) + data = {} + + File.open(file, 'r') do |ios| + ios.each do |line| + key, val = line.chomp.split(' ') + data[key] = val.to_i; + end + end + + return data +end + +def png2base64(file) + png = "" + + File.open(file, "r") do |ios| + png = ios.read + end + + "data:image/png;base64," + Base64.encode64(png) +end + +if ARGV.empty? + $stderr.puts "Usage: QA_Illumina_report.rb > " + exit +end + +tmpdir = Dir.mktmpdir +seq_file = ARGV.shift + +analyze_vals_file = File.join(tmpdir, 'analyze_vals.txt') +analyze_vals_trim_file = File.join(tmpdir, 'analyze_vals_trim.txt') +lendist_file = File.join(tmpdir, 'lendist.png') +scores_file = File.join(tmpdir, 'scores.png') +nucdist_file = File.join(tmpdir, 'nucdist.png') +lendist_bin_file = File.join(tmpdir, 'lendist_bin.png') +scores_bin_file = File.join(tmpdir, 'scores_bin.png') + +STDERR.puts "Analyzing sequences ... " + +system( + "read_fastq -i #{seq_file} | + progress_meter | + analyze_vals -k SEQ -o #{analyze_vals_file} | + trim_seq -l 3 -m 25 | + grab -e 'SEQ_LEN > 0' | + analyze_vals -k SEQ -o #{analyze_vals_trim_file} | + plot_distribution -k SEQ_LEN -T 'Sequence length distribution' -X 'Sequence length' -t png -o #{lendist_file} | + plot_scores -c -t png -o #{scores_file} | + plot_nucleotide_distribution -t png -o #{nucdist_file} | + bin_vals -k SEQ_LEN -b 50 | + plot_distribution -T '50 bases bin sequence length distribution' -X 'Sequence length' -k SEQ_LEN_BIN -t png -o #{lendist_bin_file} | + mean_scores | + bin_vals -k SCORES_MEAN -b 5 | + plot_distribution -k SCORES_MEAN_BIN -T '5 bin mean score distribution' -X 'Mean scores' -t png -o #{scores_bin_file} -x" +) + +STDERR.puts "done.\n" + +analysis1 = parse_analysis(analyze_vals_file) +analysis2 = parse_analysis(analyze_vals_trim_file) + +template = %{ + + + QA Illumina Report + + +

QA Illumina Report

+

Date: #{Time.now}

+

File: <%= seq_file %>

+

Sequence composition

+ + + + + + + +
Before trimmingAfter trimming
Number of sequences<%= analysis1['COUNT'] %><%= analysis2['COUNT'] %>
Number of bases<%= analysis1['SUM'] %><%= analysis2['SUM'] %>
Min sequence length<%= analysis1['MIN'] %><%= analysis2['MIN'] %>
Max sequence length<%= analysis1['MAX'] %><%= analysis2['MAX'] %>
Max sequence length<%= analysis1['MEAN'] %><%= analysis2['MEAN'] %>
+

Sequence trimming was performed by removing from the ends all residues until 3 consecutive

+

residues with quality score larger than or equal to 25.

+

All plots are after sequence trimming.

+

Sequence length distribution

+

+

+

Sequence quality scores

+

+

+

Sequence nucleotide distribution

+

+ + +}.gsub(/^\s+/, '') + +html = ERB.new(template) + +puts html.result(binding) + +__END__