X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_scripts%2FQA_Illumina_report.rb;h=6e0ba0a4c10c9ec695daf0c81dd1fb0c5f591c6c;hb=2f0fd91b461033529a4a72e161bd133252a22eb6;hp=2655b10a4856076fa41a88254db65e8e7e811459;hpb=fe7ad43a97afc0f3a12d789adc14390a50733315;p=biopieces.git diff --git a/bp_scripts/QA_Illumina_report.rb b/bp_scripts/QA_Illumina_report.rb index 2655b10..6e0ba0a 100755 --- a/bp_scripts/QA_Illumina_report.rb +++ b/bp_scripts/QA_Illumina_report.rb @@ -66,11 +66,11 @@ scores_bin_file = File.join(tmpdir, 'scores_bin.png') STDERR.puts "Analyzing sequences ... " system( - "read_fastq -i #{seq_file} | + "read_fastq -e base_33 -i #{seq_file} | progress_meter | analyze_vals -k SEQ -o #{analyze_vals_file} | trim_seq -l 3 -m 25 | - grab -e 'SEQ_LEN > 0' | + grab -e 'SEQ_LEN > 20' | analyze_vals -k SEQ -o #{analyze_vals_trim_file} | find_adaptor -l 6 -L 6 -f ACACGACGCTCTTCCGATCT -r AGATCGGAAGAGCACACGTC | clip_adaptor | @@ -78,7 +78,7 @@ system( analyze_vals -k SEQ -o #{analyze_vals_trim_noadapt_file} | plot_distribution -k SEQ_LEN -T 'Sequence length distribution' -X 'Sequence length' -t png -o #{lendist_file} | plot_scores -c -t png -o #{scores_file} | - plot_nucleotide_distribution -t png -o #{nucdist_file} | + plot_nucleotide_distribution -c -t png -o #{nucdist_file} | bin_vals -k SEQ_LEN -b 25 | plot_distribution -T '25 bases bin sequence length distribution' -X 'Sequence length' -k SEQ_LEN_BIN -t png -o #{lendist_bin_file} | mean_scores | @@ -112,7 +112,7 @@ template = %{

Sequence trimming was performed by removing from the ends all residues until 3 consecutive

residues with quality score larger than or equal to 25.

-

All plots are after sequence trimming.

+

All plots are after sequence trimming and adaptor removal.

Sequence length distribution