X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_scripts%2FQA_Illumina_report.rb;h=420294c08662e09d34fa0fa79b16450a3583653a;hb=ddbb577ec991c2a466b6c1230cc241f649753f09;hp=2655b10a4856076fa41a88254db65e8e7e811459;hpb=fe7ad43a97afc0f3a12d789adc14390a50733315;p=biopieces.git diff --git a/bp_scripts/QA_Illumina_report.rb b/bp_scripts/QA_Illumina_report.rb index 2655b10..420294c 100755 --- a/bp_scripts/QA_Illumina_report.rb +++ b/bp_scripts/QA_Illumina_report.rb @@ -66,11 +66,11 @@ scores_bin_file = File.join(tmpdir, 'scores_bin.png') STDERR.puts "Analyzing sequences ... " system( - "read_fastq -i #{seq_file} | + "read_fastq -e illumina1.8 -i #{seq_file} | progress_meter | analyze_vals -k SEQ -o #{analyze_vals_file} | trim_seq -l 3 -m 25 | - grab -e 'SEQ_LEN > 0' | + grab -e 'SEQ_LEN > 20' | analyze_vals -k SEQ -o #{analyze_vals_trim_file} | find_adaptor -l 6 -L 6 -f ACACGACGCTCTTCCGATCT -r AGATCGGAAGAGCACACGTC | clip_adaptor | @@ -78,7 +78,7 @@ system( analyze_vals -k SEQ -o #{analyze_vals_trim_noadapt_file} | plot_distribution -k SEQ_LEN -T 'Sequence length distribution' -X 'Sequence length' -t png -o #{lendist_file} | plot_scores -c -t png -o #{scores_file} | - plot_nucleotide_distribution -t png -o #{nucdist_file} | + plot_nucleotide_distribution -c -t png -o #{nucdist_file} | bin_vals -k SEQ_LEN -b 25 | plot_distribution -T '25 bases bin sequence length distribution' -X 'Sequence length' -k SEQ_LEN_BIN -t png -o #{lendist_bin_file} | mean_scores | @@ -112,7 +112,7 @@ template = %{

Sequence trimming was performed by removing from the ends all residues until 3 consecutive

residues with quality score larger than or equal to 25.

-

All plots are after sequence trimming.

+

All plots are after sequence trimming and adaptor removal.

Sequence length distribution