From: martinahansen Date: Tue, 24 May 2011 13:36:23 +0000 (+0000) Subject: working on clean_reads scripts X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=17bba756995c2d3893ae61e2b0784bf0ad29c3c7;p=biopieces.git working on clean_reads scripts git-svn-id: http://biopieces.googlecode.com/svn/trunk@1424 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_scripts/clean_reads.sh b/bp_scripts/clean_reads.sh deleted file mode 100755 index 6929378..0000000 --- a/bp_scripts/clean_reads.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -# Remove adaptor from sequences and trim according to scores. -# Produces a number of plots during the process. - -# Usage: read_fastq -i test.fq | clean_reads.sh | write_fastq -xo clean.fq - -#adaptor='TCGTATGCCGTCTTCTGCTTG' # 454 adaptor -adaptor='AGATCGGAAGACACACGTCT' # Solexa adaptor -pid=$$ - -progress_meter | -plot_lendist -t post -o 00_remove_adaptor_lendist_seq_before.$pid.ps -k SEQ_LEN | -find_adaptor -a $adaptor -p | -plot_histogram -t post -o 01_remove_adaptor_histogram_adaptor_pos.$pid.ps -k ADAPTOR_POS -s num | -plot_lendist -t post -o 02_remove_adaptor_lendist_adaptor_len.$pid.ps -k ADAPTOR_LEN | -plot_lendist -t post -o 03_remove_adaptor_lendist_seq_after.$pid.ps -k SEQ_LEN | -analyze_vals -k ADAPTOR_POS,ADAPTOR_LEN -o 04_remove_adaptor_analyze_vals.$pid.txt | -clip_adaptor | -plot_scores -t post -o 04_trim_seq_scores_pretrim.$pid.ps | -plot_lendist -k SEQ_LEN -t post -o 05_trim_seq_lendist_pretrim.$pid.ps | -trim_seq | -grab -e "SEQ_LEN>=30" | -mean_scores -l | -grab -e "SCORES_LOCAL_MEAN>=15" | -plot_scores -t post -o 06_trim_seq_scores_posttrim.$pid.ps | -plot_lendist -k SEQ_LEN -t post -o 07_trim_seq_lendist_posttrim.$pid.ps diff --git a/bp_scripts/clean_reads_454.sh b/bp_scripts/clean_reads_454.sh new file mode 100755 index 0000000..70e05e9 --- /dev/null +++ b/bp_scripts/clean_reads_454.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Remove adaptor from sequences and trim according to scores. +# Produces a number of plots during the process. + +# Usage: read_fastq -i test.fq | clean_reads.sh | write_fastq -xo clean.fq + +adaptor='TCGTATGCCGTCTTCTGCTTG' # 454 adaptor +#adaptor='AGATCGGAAGACACACGTCT' # Solexa adaptor +pid=$$ + +progress_meter | +plot_lendist -t post -o 00_remove_adaptor_lendist_seq_before.$pid.ps -k SEQ_LEN | +find_adaptor -a $adaptor -p | +plot_histogram -t post -o 01_remove_adaptor_histogram_adaptor_pos.$pid.ps -k ADAPTOR_POS -s num | +plot_lendist -t post -o 02_remove_adaptor_lendist_adaptor_len.$pid.ps -k ADAPTOR_LEN | +plot_lendist -t post -o 03_remove_adaptor_lendist_seq_after.$pid.ps -k SEQ_LEN | +analyze_vals -k ADAPTOR_POS,ADAPTOR_LEN -o 04_remove_adaptor_analyze_vals.$pid.txt | +clip_adaptor | +plot_scores -t post -o 04_trim_seq_scores_pretrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 05_trim_seq_lendist_pretrim.$pid.ps | +trim_seq | +grab -e "SEQ_LEN>=50" | +mean_scores -l | +grab -e "SCORES_LOCAL_MEAN>=15" | +plot_scores -t post -o 06_trim_seq_scores_posttrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 07_trim_seq_lendist_posttrim.$pid.ps diff --git a/bp_scripts/clean_reads_illumina.sh b/bp_scripts/clean_reads_illumina.sh new file mode 100755 index 0000000..6929378 --- /dev/null +++ b/bp_scripts/clean_reads_illumina.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Remove adaptor from sequences and trim according to scores. +# Produces a number of plots during the process. + +# Usage: read_fastq -i test.fq | clean_reads.sh | write_fastq -xo clean.fq + +#adaptor='TCGTATGCCGTCTTCTGCTTG' # 454 adaptor +adaptor='AGATCGGAAGACACACGTCT' # Solexa adaptor +pid=$$ + +progress_meter | +plot_lendist -t post -o 00_remove_adaptor_lendist_seq_before.$pid.ps -k SEQ_LEN | +find_adaptor -a $adaptor -p | +plot_histogram -t post -o 01_remove_adaptor_histogram_adaptor_pos.$pid.ps -k ADAPTOR_POS -s num | +plot_lendist -t post -o 02_remove_adaptor_lendist_adaptor_len.$pid.ps -k ADAPTOR_LEN | +plot_lendist -t post -o 03_remove_adaptor_lendist_seq_after.$pid.ps -k SEQ_LEN | +analyze_vals -k ADAPTOR_POS,ADAPTOR_LEN -o 04_remove_adaptor_analyze_vals.$pid.txt | +clip_adaptor | +plot_scores -t post -o 04_trim_seq_scores_pretrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 05_trim_seq_lendist_pretrim.$pid.ps | +trim_seq | +grab -e "SEQ_LEN>=30" | +mean_scores -l | +grab -e "SCORES_LOCAL_MEAN>=15" | +plot_scores -t post -o 06_trim_seq_scores_posttrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 07_trim_seq_lendist_posttrim.$pid.ps