From: martinahansen Date: Tue, 24 May 2011 09:11:38 +0000 (+0000) Subject: polished bp_scripts X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=213bbdfb7de5f49821a6ca8edda36d8ce4987ed3;p=biopieces.git polished bp_scripts git-svn-id: http://biopieces.googlecode.com/svn/trunk@1420 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_scripts/clean_reads.sh b/bp_scripts/clean_reads.sh new file mode 100755 index 0000000..6929378 --- /dev/null +++ b/bp_scripts/clean_reads.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Remove adaptor from sequences and trim according to scores. +# Produces a number of plots during the process. + +# Usage: read_fastq -i test.fq | clean_reads.sh | write_fastq -xo clean.fq + +#adaptor='TCGTATGCCGTCTTCTGCTTG' # 454 adaptor +adaptor='AGATCGGAAGACACACGTCT' # Solexa adaptor +pid=$$ + +progress_meter | +plot_lendist -t post -o 00_remove_adaptor_lendist_seq_before.$pid.ps -k SEQ_LEN | +find_adaptor -a $adaptor -p | +plot_histogram -t post -o 01_remove_adaptor_histogram_adaptor_pos.$pid.ps -k ADAPTOR_POS -s num | +plot_lendist -t post -o 02_remove_adaptor_lendist_adaptor_len.$pid.ps -k ADAPTOR_LEN | +plot_lendist -t post -o 03_remove_adaptor_lendist_seq_after.$pid.ps -k SEQ_LEN | +analyze_vals -k ADAPTOR_POS,ADAPTOR_LEN -o 04_remove_adaptor_analyze_vals.$pid.txt | +clip_adaptor | +plot_scores -t post -o 04_trim_seq_scores_pretrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 05_trim_seq_lendist_pretrim.$pid.ps | +trim_seq | +grab -e "SEQ_LEN>=30" | +mean_scores -l | +grab -e "SCORES_LOCAL_MEAN>=15" | +plot_scores -t post -o 06_trim_seq_scores_posttrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 07_trim_seq_lendist_posttrim.$pid.ps diff --git a/bp_scripts/remove_adaptor_454.sh b/bp_scripts/remove_adaptor_454.sh index 34013b1..5088d47 100755 --- a/bp_scripts/remove_adaptor_454.sh +++ b/bp_scripts/remove_adaptor_454.sh @@ -2,14 +2,13 @@ # Remove adaptor from sequences and produce a number of plots. -# Usage: read_fastq -i test.fq | remove_adaptor.sh | write_fastq -xo test_no_adaptor.fq +# Usage: read_fastq -i test.fq | remove_adaptor_454.sh | write_fastq -xo test_no_adaptor.fq adaptor='TCGTATGCCGTCTTCTGCTTG' # 454 adaptor pid=$$ -#find_adaptor -a $adaptor | plot_lendist -t post -o 00_remove_adaptor_lendist_seq_before.$pid.ps -k SEQ_LEN | -parallel -k --blocksize 50M --pipe --recend "\n---\n" "nice -n 19 find_adaptor -a $adaptor" | +find_adaptor -a $adaptor -p | plot_histogram -t post -o 01_remove_adaptor_histogram_adaptor_pos.$pid.ps -k ADAPTOR_POS -s num | plot_lendist -t post -o 02_remove_adaptor_lendist_adaptor_len.$pid.ps -k ADAPTOR_LEN | plot_lendist -t post -o 03_remove_adaptor_lendist_seq_after.$pid.ps -k SEQ_LEN | diff --git a/bp_scripts/trim_seq.sh b/bp_scripts/trim_seq.sh index 60839aa..7866c74 100755 --- a/bp_scripts/trim_seq.sh +++ b/bp_scripts/trim_seq.sh @@ -5,11 +5,11 @@ pid=$$ -plot_scores -t post -o trim_seq_scores_pretrim.$pid.ps | -plot_lendist -k SEQ_LEN -t post -o trim_seq_lendist_pretrim.$pid.ps | +plot_scores -t post -o 00_trim_seq_scores_pretrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 01_trim_seq_lendist_pretrim.$pid.ps | trim_seq | grab -e "SEQ_LEN>=30" | mean_scores -l | grab -e "SCORES_LOCAL_MEAN>=15" | -plot_scores -t post -o trim_seq_scores_posttrim.$pid.ps | -plot_lendist -k SEQ_LEN -t post -o trim_seq_lendist_posttrim.$pid.ps +plot_scores -t post -o 02_trim_seq_scores_posttrim.$pid.ps | +plot_lendist -k SEQ_LEN -t post -o 03_trim_seq_lendist_posttrim.$pid.ps