From 0c6215d950e133b0f415d38df85caaa6c9cd2a7c Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 1 Jul 2008 07:36:05 +0000 Subject: [PATCH] more wikis git-svn-id: http://biopieces.googlecode.com/svn/trunk@95 74ccb610-7750-0410-82ae-013aeee3265d --- bp_usage/fold_seq | 16 ----- bp_usage/fold_seq.wiki | 102 +++++++++++++++++++++++++++++++ bp_usage/transliterate_seq | 22 ------- bp_usage/transliterate_seq.wiki | 79 ++++++++++++++++++++++++ bp_usage/transliterate_vals | 21 ------- bp_usage/transliterate_vals.wiki | 65 ++++++++++++++++++++ bp_usage/write_bed | 28 --------- bp_usage/write_bed.wiki | 98 +++++++++++++++++++++++++++++ code_perl/Maasha/Gwiki.pm | 12 ++-- 9 files changed, 348 insertions(+), 95 deletions(-) delete mode 100644 bp_usage/fold_seq create mode 100644 bp_usage/fold_seq.wiki delete mode 100644 bp_usage/transliterate_seq create mode 100644 bp_usage/transliterate_seq.wiki delete mode 100644 bp_usage/transliterate_vals create mode 100644 bp_usage/transliterate_vals.wiki delete mode 100644 bp_usage/write_bed create mode 100644 bp_usage/write_bed.wiki diff --git a/bp_usage/fold_seq b/bp_usage/fold_seq deleted file mode 100644 index 08e2b10..0000000 --- a/bp_usage/fold_seq +++ /dev/null @@ -1,16 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: February 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Folds sequences in stream. - -Usage: ... | $script [options] - -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script - Folds all sequences in stream. diff --git a/bp_usage/fold_seq.wiki b/bp_usage/fold_seq.wiki new file mode 100644 index 0000000..4f7dd4b --- /dev/null +++ b/bp_usage/fold_seq.wiki @@ -0,0 +1,102 @@ +=Biopiece: fold_seq= + +==Synopsis== + +Fold sequences in stream. + +==Description== + +[fold_seq] fold nucleotide sequence into secodary structure, which is useful to illustrate +e.g. miRNA hairpins. The resulting folding information may be uploaded to a custom UCSC Genome +Browser track (see [upload_to_ucsc]). + +[fold_seq] currently uses RNAfold from the Vienna package as folding engine. RNAfold must be +installed for [fold_seq] to work. Read more about RNAfold here: + +http://www.tbi.univie.ac.at/RNA/ + +==Usage== + +{{{ +... | fold_seq [options] +}}} + +==Options== + +{{{ +[-I | --stream_in=] - Read input from stream file - Default=STDIN +[-O | --stream_out=] - Write output to stream file - Default=STDOUT +}}} + +==Examples== + +Consider the following RNA sequence entry in FASTA format from the file `test.fna`: + +{{{ +>MI0000116 +UUCAGCCUUUGAGAGUUCCAUGCUUCCUUGCAUUCAAUAGUUAUAUUCAAGCAUAUGGAAUGUAAAGAAGUAUGGAGCGAAAUCUGGCGAG +}}} + +We can read that file with [read_fasta] and then fold the sequence with [fold_seq]: + +{{{ +read_fasta -i test.fna | fold_seq + +SIZE: 91 +FREE_ENERGY: -36.20 +SCORE: 36 +SEQ: UUCAGCCUUUGAGAGUUCCAUGCUUCCUUGCAUUCAAUAGUUAUAUUCAAGCAUAUGGAAUGUAAAGAAGUAUGGAGCGAAAUCUGGCGAG +SEQ_NAME: MI0000116 +SEQ_LEN: 91 +SEC_STRUCT: ....(((...((..((((((((((((.((((((((.((((((.......))).))).)))))))).))))))))))))....)).)))... +CONF: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +--- +}}} + +The resulting record contains the secondary structure in Stockholm format as the value to SEC_STRUCT. +Also, the FREE_ENERGY key is useful for selecting good structures. [grab] is your friend. The CONF key +holds confidence information for the folding at any given position (not used, so set to 1s). + +Now, if you have a fully configure UCSC Genome Browser system installed, it is possible to upload secondary +structure information. To do this, you must have a BED entry, and the folded sequence. The reasonable way to +do this is to have a BED file with the coordinates of the sequence you wish to fold and display and then the +magic: + +{{{ +read_bed -i | get_genome_seq -g | fold_seq | upload_to_ucsc -d -t -x +}}} + +Notice that the table name _must_ contain the string 'rnaSecStr' to display the folding information in the +Genome Browser. + +==See also== + +[read_fasta] + +[upload_to_ucsc] + +[grab] + +[read_bed] + +[get_genome_seq] + +==Author== + +Martin Asser Hansen - Copyright (C) - All rights reserved. + +mail@maasha.dk + +August 2007 + +==License== + +GNU General Public License version 2 + +http://www.gnu.org/copyleft/gpl.html + +==Help== + +[fold_seq] is part of the Biopieces framework. + +http://code.google.com/p/biopieces/ diff --git a/bp_usage/transliterate_seq b/bp_usage/transliterate_seq deleted file mode 100644 index 9a3af94..0000000 --- a/bp_usage/transliterate_seq +++ /dev/null @@ -1,22 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Transliterate chars from sequences in stream. - -Usage: ... | $script [options] - -Options: [-s | --search=] - String of chars to locate and replace -Options: [-r | --replace=] - String of chars for replacing -Options: [-d | --delete=] - String of chars to delete -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -s Uu -r Tt - Replacing u with t as in converting RNA to DNA. -Examples: ... | $script -d '.~-' - Removing indels. - -Keys in: SEQ - Sequence. diff --git a/bp_usage/transliterate_seq.wiki b/bp_usage/transliterate_seq.wiki new file mode 100644 index 0000000..d8e000f --- /dev/null +++ b/bp_usage/transliterate_seq.wiki @@ -0,0 +1,79 @@ +=Biopiece: transliterate_seq= + +==Synopsis== + +Transliterate chars from sequences in stream. + +==Description== + +Transliteration is ultra fast search and replace (or search and delete) of characters in sequence +and is useful for things as lowercasing sequence, converting sequence from RNA to DNA or convering +soft-masked sequence to hard-masked. + +==Usage== + +{{{ +... | transliterate_seq [options] +}}} + +==Options== + +{{{ +[-s | --search=] - String of chars to locate and replace +[-r | --replace=] - String of chars for replacing +[-d | --delete=] - String of chars to delete +[-I | --stream_in=] - Read input from stream file - Default=STDIN +[-O | --stream_out=] - Write output to stream file - Default=STDOUT +}}} + +==Examples== + +To lowercase sequence: + +{{{ +transliterate_seq -s ATCGUN -r atcgun +}}} + +To convert RNA to DNA: + +{{{ +transliterate_seq -s Uu -r Tt +}}} + +To convert soft-masked sequence to hard-masked: + +{{{ +transliterate_seq -s atcgu -r NNNNN +}}} + +To remove all N's from a sequence: + +{{{ +transliterate_seq -s nN -d +}}} + +==See also== + +[transliterate_vals] + +[uppercase_seq] + +==Author== + +Martin Asser Hansen - Copyright (C) - All rights reserved. + +mail@maasha.dk + +August 2007 + +==License== + +GNU General Public License version 2 + +http://www.gnu.org/copyleft/gpl.html + +==Help== + +[transliterate_seq] is part of the Biopieces framework. + +http://code.google.com/p/biopieces/ diff --git a/bp_usage/transliterate_vals b/bp_usage/transliterate_vals deleted file mode 100644 index 5101a07..0000000 --- a/bp_usage/transliterate_vals +++ /dev/null @@ -1,21 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: April 2008 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Transliterate chars from values in stream. - -Usage: ... | $script [options] - -Options: [-k | --keys=] - List of values to transliterate -Options: [-s | --search=] - String of chars to locate and replace -Options: [-r | --replace=] - String of chars for replacing -Options: [-d | --delete=] - String of chars to delete -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT - -Examples: ... | $script -k SEQ -s Uu -r Tt - Replacing u with t in SEQ (i.e converting RNA to DNA). -Examples: ... | $script -k PATTERN -d '.~-' - Removing indels from PATTERN. diff --git a/bp_usage/transliterate_vals.wiki b/bp_usage/transliterate_vals.wiki new file mode 100644 index 0000000..c0b5ebe --- /dev/null +++ b/bp_usage/transliterate_vals.wiki @@ -0,0 +1,65 @@ +=Biopiece: transliterate_vals= + +==Synopsis== + +Transliterate chars from values in stream. + +==Description== + +Transliteration is ultra fast search and replace (or search and delete) of characters in values +and is useful for things as converting sequence from RNA to DNA or removing indels from patterns. + +==Usage== + +{{{ +... | transliterate_vals [options] +}}} + +==Options== + +{{{ +[-k | --keys=] - List of values to transliterate +[-s | --search=] - String of chars to locate and replace +[-r | --replace=] - String of chars for replacing +[-d | --delete=] - String of chars to delete +[-I | --stream_in=] - Read input from stream file - Default=STDIN +[-O | --stream_out=] - Write output to stream file - Default=STDOUT +}}} + +==Examples== + +To convert RNA sequence to DNA: + +{{{ +transliterate_vals -k SEQ -s Uu -r Tt +}}} + +To remove indels from patterns: + +{{{ +transliterate_vals -k PATTERN -s '._-~' -d +}}} + +==See also== + +[transliterate_seq] + +==Author== + +Martin Asser Hansen - Copyright (C) - All rights reserved. + +mail@maasha.dk + +August 2007 + +==License== + +GNU General Public License version 2 + +http://www.gnu.org/copyleft/gpl.html + +==Help== + +[transliterate_vals] is part of the Biopieces framework. + +http://code.google.com/p/biopieces/ diff --git a/bp_usage/write_bed b/bp_usage/write_bed deleted file mode 100644 index a71f9b7..0000000 --- a/bp_usage/write_bed +++ /dev/null @@ -1,28 +0,0 @@ -Author: Martin Asser Hansen - Copyright (C) - All rights reserved - -Contact: mail@maasha.dk - -Date: August 2007 - -License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html) - -Description: Write records from stream as BED lines. - -Usage: ... | $script [options] - -Options: [-x | --no_stream] - Do not emit records. -Options: [-o | --data_out=] - Write result to file. -Options: [-I | --stream_in=] - Read input from stream file - Default=STDIN -Options: [-O | --stream_out=] - Write output to stream file - Default=STDOUT -Options: [-Z | --compress] - Compress output using gzip. - -Examples: ... | $script -x -o table.bed - Output data to file 'table.bed'. -Examples: ... | $script -Z -o table.bed.gz - Output zipped data to file 'table.bed.gz'. - -Keys in: REC_TYPE - Either BED,PSL,PATSCAN,BLAST,VMATCH, otherwise generic. -Keys in: CHR - Chromosome. -Keys in: CHR_BEG - Chromosome begin position. -Keys in: CHR_END - Chromosome end position. -Keys in: Q_ID - Feature name. -Keys in: SCORE - Score. -Keys in: STRAND - Strand. diff --git a/bp_usage/write_bed.wiki b/bp_usage/write_bed.wiki new file mode 100644 index 0000000..e375fde --- /dev/null +++ b/bp_usage/write_bed.wiki @@ -0,0 +1,98 @@ +=Biopiece: write_bed= + +==Synopsis== + +Write BED entries from stream. + +==Description== + +[write_bed] outputs BED (Browser Extensible Data) records from the stream. +The BED format consists of up to 12 columns, where the first three are mandatory. + + # CHR - the name of the chromosome. + # CHR_BEG - the chromosome begin position. + # CHR_END - the chromosome end position. + # Q_ID - the name of the feature. + # SCORE - a score between 0 and 1000. + # STRAND - the orientation of the feature. + # THICK_BEG - begin position of 'thick' drawing used for UTRs. + # THICK_END - end position of 'thick' drawing used for UTRs. + # ITEMRGB - RGB color code for feature. + # BLOCKCOUNT - number of exon blocks. + # BLOCKSIZES - list of block sizes. + # Q_BEGS - list of block begins. + +Records can be written to BED format if the REC_TYPE is either: + + * BED - from [read_bed] + * PSL - from [read_psl] or [blat_seq] + * PATSCAN - from [patscan_seq] + * BLAST - from [read_blast_tab] or [blast_seq] + * VMATCH - from [vmatch_seq] + +... and if S_ID key begins with 'chr'. + +Also, generic records may be output as BED format if the record contains +CHR, CHR_BEG, and CHR_END keys and if the CHR value begins with 'chr'. + +For more about the BED format: + +http://genome.ucsc.edu/FAQ/FAQformat#format1 + +==Usage== + +{{{ +... | write_bed [options] +}}} + +==Options== + +{{{ +[-x | --no_stream] - Do not emit records. +[-o | --data_out=] - Write result to file. +[-I | --stream_in=] - Read input from stream file - Default=STDIN +[-O | --stream_out=] - Write output to stream file - Default=STDOUT +[-Z | --compress] - Compress output using gzip. +}}} + +==Examples== + +{{{ +... | write_bed -xo test.bed +}}} + +==See also== + +[read_bed] + +[read_psl] + +[blat_seq] + +[patscan_seq] + +[read_blast_tab] + +[blast_seq] + +[vmatch_seq] + +==Author== + +Martin Asser Hansen - Copyright (C) - All rights reserved. + +mail@maasha.dk + +August 2007 + +==License== + +GNU General Public License version 2 + +http://www.gnu.org/copyleft/gpl.html + +==Help== + +[write_bed] is part of the Biopieces framework. + +http://code.google.com/p/biopieces/ diff --git a/code_perl/Maasha/Gwiki.pm b/code_perl/Maasha/Gwiki.pm index d97a8d4..5f47119 100644 --- a/code_perl/Maasha/Gwiki.pm +++ b/code_perl/Maasha/Gwiki.pm @@ -76,16 +76,12 @@ sub gwiki2ascii for ( $i = 0; $i < @{ $block }; $i++ ) { push @lines, " * $block->[ $i ]->{ 'TEXT' }"; } - - push @lines, ""; } elsif ( $block->[ 0 ]->{ 'FORMAT' } eq "enumerate" ) { for ( $i = 0; $i < @{ $block }; $i++ ) { push @lines, " " . ( $i + 1 ) . ". $block->[ $i ]->{ 'TEXT' }"; } - - push @lines, ""; } elsif ( $block->[ 0 ]->{ 'FORMAT' } eq "paragraph" ) { @@ -97,9 +93,9 @@ sub gwiki2ascii $line =~ s/\s*$//; $line =~ s/\s+/ /g; $line =~ tr/`//d; - $line =~ s/\[([^\]]+?)\]/$1/g; - $line =~ s/\*([^\*]+?)\*/&text_bold($1)/ge; - $line =~ s/_([^_]+?)_/&text_underline($1)/ge; + $line =~ s/\[([^\]]+?)\]/&text_underline($1)/ge; + $line =~ s/\*([^\*]+?)\*/&text_bold($1)/ge if $line =~ /(^| )\*[^\*]+?\*( |$)/; + $line =~ s/_([^_]+?)_/&text_underline($1)/ge if $line =~ /(^| )_[^_]+?_( |$)/; push @lines, $_ foreach &Maasha::Common::wrap_line( $line, 80 ); } @@ -270,7 +266,7 @@ sub text_bold my ( $txt, ) = @_; - return colored ( $txt, "magenta" ); + return colored ( $txt, "bold" ); } -- 2.39.5