From: martinahansen Date: Thu, 17 Jul 2008 02:53:58 +0000 (+0000) Subject: cleaning bp_usage X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=5b74d4321dc81498ba326d3abc50363e27cf5025;p=biopieces.git cleaning bp_usage git-svn-id: http://biopieces.googlecode.com/svn/trunk@161 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_usage/read_soft b/bp_usage/read_soft deleted file mode 100644 index c35e4ef..0000000 --- a/bp_usage/read_soft +++ /dev/null @@ -1,22 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: January 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Read data in GEO soft format. NCBI's deep sequencing and micro array data format. - -Usage: $script [options] -i - -Options: [-i | --data_in=] - Read input data from file. -Options: [-n | --num=] - Limit number of records to read and output. -Options: [-I | --stream_in=] - Read input stream from file - Default=STDIN -Options: [-O | --stream_out=] - Write output stream to file - Default=STDOUT - -Examples: $script -i GSE6734_family.soft - -Keys out: SAMPLE_TITLE - Title of sample. -Keys out: SEQ_NAME - Sequence name composed of Platform Series ID, Sample GEO accession, Sequence number in current experiment, and read count. -Keys out: SEQ - Sequence. diff --git a/bp_usage/read_solexa b/bp_usage/read_solexa deleted file mode 100644 index 7b947e7..0000000 --- a/bp_usage/read_solexa +++ /dev/null @@ -1,28 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: April 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Read Solexa deep sequenceing output files. Lowercase sequence indicates low quality. - -Usage: $script [options] -i - -Options: [-i | --data_in=] - Comma separated list of files or glob expression to read. -Options: [-n | --num=] - Limit number of records to read. -Options: [-q | --quality=] - Lowercase nucleotide with quality score below this limit (min:0 max:40) - Default=20 -Options: [-I | --stream_in=] - Read input stream from file - Default=STDIN -Options: [-O | --stream_out=] - Write output stream to file - Default=STDOUT - -Examples: $script -i test.solexa - Read Solexa entries from file. -Examples: $script -i test1.fna,test2.solexa - Read Solexa entries from files. -Examples: $script -i '*.solexa' - Read Solexa entries from files. -Examples: $script -i test.solexa -n 10 - Read first 10 Solexa entries from file. -Examples: $script -i test.solexa -q 10 - Change quality score threshold to 10. - -Keys out: SEQ_NAME - Name of sequence. -Keys out: SEQ - Sequence. -Keys out: SEQ_LEN - Length of sequence. -Keys out: SCORE_MEAN - Mean quality score. diff --git a/bp_usage/read_solid b/bp_usage/read_solid deleted file mode 100644 index 765d4f1..0000000 --- a/bp_usage/read_solid +++ /dev/null @@ -1,30 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: April 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Read Solid sequence files with Name, Sequence and Quality. - -Usage: $script [options] -i - -Options: [-i | --data_in=] - Comma separated list of files or glob expression to read. -Options: [-n | --num=] - Limit number of records to read. -Options: [-q | --quality=] - Lowercase nucleotide with quality score below this limit (min:0 max:40) - Default=20 -Options: [-I | --stream_in=] - Read input stream from file - Default=STDIN -Options: [-O | --stream_out=] - Write output stream to file - Default=STDOUT - -Examples: $script -i test.solid - Read Solid entries from file. -Examples: $script -i test1.fna,test2.solid - Read Solid entries from files. -Examples: $script -i '*.solid' - Read Solid entries from files. -Examples: $script -i test.solid -n 10 - Read first 10 Solid entries from file. -Examples: $script -i test.solid -q 10 - Change quality score threshold to 10. - -Keys out: SEQ_NAME - Name of sequence. -Keys out: SEQ_CS - Sequence in color space. -Keys out: SEQ_QUAL - Sequence quality. -Keys out: SEQ - Sequence. -Keys out: SEQ_LEN - Length of sequence. -Keys out: SCORE_MEAN - Mean quality score. diff --git a/bp_usage/read_stockholm b/bp_usage/read_stockholm deleted file mode 100644 index fc03db2..0000000 --- a/bp_usage/read_stockholm +++ /dev/null @@ -1,22 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: September 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Read data in Stockholm format. - -Usage: $script [options] -i - -Options: [-i | --data_in=] - Read input data from file. -Options: [-n | --num=] - Limit number of records to read. -Options: [-I | --stream_in=] - Read input stream from file - Default=STDIN -Options: [-O | --stream_out=] - Write output stream to file - Default=STDOUT - -Examples: $script -i test.stockholm - -Keys out: SEQ_NAME - Sequence name. -Keys out: SEQ - Aligned sequence. -Keys out: ALIGN - Number indicating what alignment this sequence belongs to. diff --git a/bp_usage/remove_indels b/bp_usage/remove_indels deleted file mode 100644 index fe73def..0000000 --- a/bp_usage/remove_indels +++ /dev/null @@ -1,17 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Remove indels (-~.) from sequences in stream. - -Usage: ... | remove_indels [options] - -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | remove_indels - Removes indels from all sequences in stream. - diff --git a/bp_usage/remove_keys b/bp_usage/remove_keys deleted file mode 100644 index f25ef36..0000000 --- a/bp_usage/remove_keys +++ /dev/null @@ -1,20 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Remove given keys from records in stream. - -Usage: ... | remove_keys [options] - -Options: [-k | --keys=] - Comma separeted list of keys to remove. -Options: [-K | --save_keys=] - Remove all save these comma separeted keys. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | remove_keys -k 'SEQ_NAME,SEQ' - Removes SEQ_NAME and SEQ from all records in stream. -Examples: ... | remove_keys -K 'SEQ_NAME,SEQ' - Removes all keys except SEQ_NAME and SEQ. - diff --git a/bp_usage/shuffle_seq b/bp_usage/shuffle_seq deleted file mode 100644 index cc7c035..0000000 --- a/bp_usage/shuffle_seq +++ /dev/null @@ -1,18 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: December 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Shuffle sequences in stream. - -Usage: ... | $script [options] - -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to file - Default=STDOUT - -Examples: ... | $script - Shuffles all sequences in stream. - -Keys in: SEQ - Sequence. diff --git a/bp_usage/split_bed b/bp_usage/split_bed deleted file mode 100644 index 672209a..0000000 --- a/bp_usage/split_bed +++ /dev/null @@ -1,28 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Split BED records into overlapping windows. - -Usage: ... | $script [options] - -Options: [-w | --window_size=] - Window size - Default=20 -Options: [-s | --step_size=] - Step size - Default=1 -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -w 12 - Split BED record into windows of size 12. -Examples: ... | $script -s 5 - Split BED record usint windows overlapping every 5th nucleotide. - -Keys in: CHR - Chromosome. -Keys in: CHR_BEG - Chromosome begin position. -Keys in: CHR_END - Chromosome end position. - -Keys out: REC_TYPE - Record type (BED). -Keys out: CHR - Chromosome. -Keys out: CHR_BEG - Chromosome begin position. -Keys out: CHR_END - Chromosome end position. diff --git a/bp_usage/split_seq b/bp_usage/split_seq deleted file mode 100644 index daff988..0000000 --- a/bp_usage/split_seq +++ /dev/null @@ -1,22 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Split sequences in stream into overlapping oligos. - -Usage: ... | $script [options] - -Options: [-w | --word_size=] - Word size of oligos - Default=7 -Options: [-u | --uniq] - Only emit unique oligos. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -w 12 -u - Only emit uniq oligoes of size 12. - -Keys in: SEQ_NAME - Sequence name. -Keys in: SEQ - Sequence. - diff --git a/bp_usage/sum_vals b/bp_usage/sum_vals deleted file mode 100644 index 22e8047..0000000 --- a/bp_usage/sum_vals +++ /dev/null @@ -1,21 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Calculate the total sums for the values of given keys. - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-k | --keys=] - Comma separated list of keys. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -x -k SEQ_LEN,HIT_LEN -o result.txt - Sum values and save to 'result.txt'. - -Keys out: - _SUM - Sum of value of diff --git a/bp_usage/translate_seq b/bp_usage/translate_seq deleted file mode 100644 index 0939737..0000000 --- a/bp_usage/translate_seq +++ /dev/null @@ -1,22 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Translate DNA sequence into protein sequence. - -Usage: ... | $script [options] - -Options: [-f | --frames=] - Comma separated list of frames of translation: 1,2,3,-1,-2,-3 - Default=all -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -f 1 - Translate into first forward reading frame. -Examples: ... | $script -f "-1,-2,-3" - Translate into all reverse reading frames. - -Keys in: SEQ - Sequence. - -Keys out: FRAME - Frame of translation. diff --git a/bp_usage/upload_to_ucsc b/bp_usage/upload_to_ucsc deleted file mode 100644 index f8eee8b..0000000 --- a/bp_usage/upload_to_ucsc +++ /dev/null @@ -1,29 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: September 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Upload data to local UCSC database for viewing in Genome Browser. In order to display secondary structures for folded RNA you must include 'rnaSecStr' in the table name. - -Usage: ... | $script [options] <-d > <-t > - -Options: [-d | --database=] - Genome database to upload track to. -Options: [-t | --table=] - Table name of track - NB! prefix with initials. e.g.: mah_test. -Options: [-x | --no_stream] - Do not emit records. -Options: [-s | --short_label=] - Short label for track - Default= -Options: [-l | --long_label=] - Long label for track - Default=
-Options: [-g | --group=] - Track group name - Default=m.hansen -Options: [-p | --priority=] - Track display priority - Default=1 -Options: [-u | --use_score] - Use the score to grey shade the track. -Options: [-v | --visibility= - Track visibility: hide|dense|squish|pack|full - Default=pack -Options: [-w | --wiggle - Create wiggle track based on overlapping sequences. -Options: [-c | --color=] - Track color e.g. '147,73,42' - Default= -Options: [-C | --chunk_size=] - Chunks for loading - Default=10000000 -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -x -d hg18 -t initials_my_test - diff --git a/bp_usage/vmatch_seq b/bp_usage/vmatch_seq deleted file mode 100644 index bb9bae6..0000000 --- a/bp_usage/vmatch_seq +++ /dev/null @@ -1,28 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: October 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: vmatch sequences in stream against a specified genome. - -Usage: ... | $script [options] -g -Usage: ... | $script [options] -i - -Options: [-g | --genome= | --index_name=] - Custom index to vmatch. -Options: [-c | --count] - Replace score with hit count. -Options: [-m | --max_hits] - Skip hits with more than maximum hits (implies --count). -Options: [-h | --hamming_dist=] - Allow mismatches. -Options: [-e | --edit_dist=] - Allow mismatches and indels. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -g hg18 - Match sequences in stream againt human genome. -Examples: ... | $script -g hg18 -h 1 - allowing for one mismatch. -Examples: ... | $script -g hg18 -h 5p - allowing for 5% mismatches. -Examples: ... | $script -g hg18 -e 2 - allowing for 2 mismatches or indels. -Examples: ... | $script -g hg18 -e 10p - allowing for 10% mismatches or indels. - diff --git a/bp_usage/write_2bit b/bp_usage/write_2bit deleted file mode 100644 index fec079d..0000000 --- a/bp_usage/write_2bit +++ /dev/null @@ -1,23 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: March 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Write sequences in 2bit format. - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-N | --no_mask] - Ignore soft masking. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -x -o test.2bit - Write entries to file 'test.2bit'. - -Keys in: SEQ_NAME - Sequence name. -Keys in: Q_ID - Used as sequence name if no SEQ_NAME. -Keys in: SEQ - Sequence. diff --git a/bp_usage/write_align b/bp_usage/write_align deleted file mode 100644 index d189686..0000000 --- a/bp_usage/write_align +++ /dev/null @@ -1,26 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Write aligned sequences in pretty alignment format. - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-w | --wrap=] - Wrap sequences to a given width. -Options: [-R | --no_ruler] - Suppress ruler for multiple alignments. -Options: [-C | --no_consensus] - Suppress consensus for multiple alignments. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -x -w 80 - Write entries wrapped to blocks of 80 to STDOUT. -Examples: ... | $script -x -o test.aln - Write entries to file 'test.aln'. - -Keys in: SEQ_NAME - Sequence name. -Keys in: SEQ - Sequence. -Keys in: ALIGN - Number specifying what alignment the sequence belong to. diff --git a/bp_usage/write_blast b/bp_usage/write_blast deleted file mode 100644 index c9d937e..0000000 --- a/bp_usage/write_blast +++ /dev/null @@ -1,36 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Write BLAST records from stream in BLAST tabular format (-m8 and 9). - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-c | --comment] - Print comment line - Default=no -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT -Options: [-Z | --compress] - Compress output using gzip. - -Examples: ... | $script -x -c -o blast.tab - Write BLAST table with comment line to 'blast.tab'. -Examples: ... | $script -x -Z -o blast.tab.gz - Write zipped BLAST table to 'blast.tab.gz'. - -Keys in: Q_ID - Query ID. -Keys in: S_ID - Subject ID. -Keys in: IDENT - Identity. -Keys in: ALIGN_LEN - Alignment length. -Keys in: MISMATCHES - Mismatches. -Keys in: GAPS - Gaps. -Keys in: Q_BEG - Query begin. -Keys in: Q_END - Query end. -Keys in: S_BEG - Subject begin. -Keys in: S_END - Subject end. -Keys in: E_VAL - Expect value. -Keys in: BIT_SCORE - Bit score. -Keys in: STRAND - Strand. -Keys in: REC_TYPE - Record type must be BLAST. diff --git a/bp_usage/write_psl b/bp_usage/write_psl deleted file mode 100644 index 97df85b..0000000 --- a/bp_usage/write_psl +++ /dev/null @@ -1,45 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Write records from stream in PSL format. - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT -Options: [-Z | --compress] - Compress output using gzip. - -Examples: ... | $script -x -o table.psl - Output data to file 'table.psl'. -Examples: ... | $script -Z -o table.psl.gz - Output zipped data to file 'table.psl.gz'. - -Keys in: MATCHES - Number of non-repeat matches. -Keys in: MISMATCHES - Number of mismatches. -Keys in: REPMATCHES - Number of repeat matches. -Keys in: NCOUNT - Number of Ns. -Keys in: QNUMINSERT - Number of inserts in query. -Keys in: QBASEINSERT - Number of bases inserted in query. -Keys in: SNUMINSERT - Number of inserts in subject. -Keys in: SBASEINSERT - Number of bases inserted in subject. -Keys in: STRAND - Strand. -Keys in: Q_ID - Query ID. -Keys in: Q_LEN - Query length. -Keys in: Q_BEG - Query begin. -Keys in: Q_END - Query end. -Keys in: S_ID - Subject ID. -Keys in: S_LEN - Subject length. -Keys in: S_BEG - Subject begin. -Keys in: S_END - Subject end. -Keys in: BLOCKCOUNT - Block count. -Keys in: BLOCKSIZES - Block sizes. -Keys in: Q_BEGS - Query sequence blocks begins. -Keys in: S_BEGS - Subject sequence blocks begins. -Keys in: SCORE - Score calculated as in web BLAT results. -Keys in: REC_TYPE - Record type. - diff --git a/bp_usage/write_solid b/bp_usage/write_solid deleted file mode 100644 index 15a19c4..0000000 --- a/bp_usage/write_solid +++ /dev/null @@ -1,26 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: April 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Write di-base encoded Solid sequences. - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-w | --wrap=] - Wrap sequences to a given width. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT -Options: [-Z | --compress] - Compress output using gzip. - -Examples: ... | $script -x -w 80 - Write entries wrapped to blocks of 80 to STDOUT. -Examples: ... | $script -x -o test.solid - Write entries to file 'test.solid'. -Examples: ... | $script -x -Z -o test.solid.gz - Write zipped entries to file 'test.solid.gz'. - -Keys in: SEQ_NAME - Sequence name. -Keys in: Q_ID - Used as sequence name if no SEQ_NAME. -Keys in: SEQ - Sequence.