From: martinahansen Date: Mon, 26 Nov 2012 16:26:00 +0000 (+0000) Subject: added adaptor removal to QA X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=fe7ad43a97afc0f3a12d789adc14390a50733315;p=biopieces.git added adaptor removal to QA git-svn-id: http://biopieces.googlecode.com/svn/trunk@1997 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_scripts/QA_Illumina_report.rb b/bp_scripts/QA_Illumina_report.rb index d82c0cf..2655b10 100755 --- a/bp_scripts/QA_Illumina_report.rb +++ b/bp_scripts/QA_Illumina_report.rb @@ -29,7 +29,7 @@ def parse_analysis(file) File.open(file, 'r') do |ios| ios.each do |line| key, val = line.chomp.split(' ') - data[key] = val.to_i; + data[key] = val.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse end end @@ -54,13 +54,14 @@ end tmpdir = Dir.mktmpdir seq_file = ARGV.shift -analyze_vals_file = File.join(tmpdir, 'analyze_vals.txt') -analyze_vals_trim_file = File.join(tmpdir, 'analyze_vals_trim.txt') -lendist_file = File.join(tmpdir, 'lendist.png') -scores_file = File.join(tmpdir, 'scores.png') -nucdist_file = File.join(tmpdir, 'nucdist.png') -lendist_bin_file = File.join(tmpdir, 'lendist_bin.png') -scores_bin_file = File.join(tmpdir, 'scores_bin.png') +analyze_vals_file = File.join(tmpdir, 'analyze_vals.txt') +analyze_vals_trim_file = File.join(tmpdir, 'analyze_vals_trim_noadapt.txt') +analyze_vals_trim_noadapt_file = File.join(tmpdir, 'analyze_vals_trim.txt') +lendist_file = File.join(tmpdir, 'lendist.png') +scores_file = File.join(tmpdir, 'scores.png') +nucdist_file = File.join(tmpdir, 'nucdist.png') +lendist_bin_file = File.join(tmpdir, 'lendist_bin.png') +scores_bin_file = File.join(tmpdir, 'scores_bin.png') STDERR.puts "Analyzing sequences ... " @@ -71,6 +72,10 @@ system( trim_seq -l 3 -m 25 | grab -e 'SEQ_LEN > 0' | analyze_vals -k SEQ -o #{analyze_vals_trim_file} | + find_adaptor -l 6 -L 6 -f ACACGACGCTCTTCCGATCT -r AGATCGGAAGAGCACACGTC | + clip_adaptor | + grab -e 'SEQ_LEN > 0' | + analyze_vals -k SEQ -o #{analyze_vals_trim_noadapt_file} | plot_distribution -k SEQ_LEN -T 'Sequence length distribution' -X 'Sequence length' -t png -o #{lendist_file} | plot_scores -c -t png -o #{scores_file} | plot_nucleotide_distribution -t png -o #{nucdist_file} | @@ -85,6 +90,7 @@ STDERR.puts "done.\n" analysis1 = parse_analysis(analyze_vals_file) analysis2 = parse_analysis(analyze_vals_trim_file) +analysis3 = parse_analysis(analyze_vals_trim_noadapt_file) template = %{ @@ -97,12 +103,12 @@ template = %{

File: <%= seq_file %>

Sequence composition

- - - - - - + + + + + +
Before trimmingAfter trimming
Number of sequences<%= analysis1['COUNT'] %><%= analysis2['COUNT'] %>
Number of bases<%= analysis1['SUM'] %><%= analysis2['SUM'] %>
Min sequence length<%= analysis1['MIN'] %><%= analysis2['MIN'] %>
Max sequence length<%= analysis1['MAX'] %><%= analysis2['MAX'] %>
Mean sequence length<%= analysis1['MEAN'] %><%= analysis2['MEAN'] %>
Before trimmingAfter trimmingAfter adaptor removal
Number of sequences<%= analysis1['COUNT'] %><%= analysis2['COUNT'] %><%= analysis3['COUNT'] %>
Number of bases<%= analysis1['SUM'] %><%= analysis2['SUM'] %><%= analysis3['SUM'] %>
Min sequence length<%= analysis1['MIN'] %><%= analysis2['MIN'] %><%= analysis3['MIN'] %>
Max sequence length<%= analysis1['MAX'] %><%= analysis2['MAX'] %><%= analysis3['MAX'] %>
Mean sequence length<%= analysis1['MEAN'] %><%= analysis2['MEAN'] %><%= analysis3['MEAN'] %>

Sequence trimming was performed by removing from the ends all residues until 3 consecutive

residues with quality score larger than or equal to 25.