]> git.donarmstrong.com Git - biopieces.git/commitdiff
more wikis
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 1 Jul 2008 07:36:05 +0000 (07:36 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 1 Jul 2008 07:36:05 +0000 (07:36 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@95 74ccb610-7750-0410-82ae-013aeee3265d

bp_usage/fold_seq [deleted file]
bp_usage/fold_seq.wiki [new file with mode: 0644]
bp_usage/transliterate_seq [deleted file]
bp_usage/transliterate_seq.wiki [new file with mode: 0644]
bp_usage/transliterate_vals [deleted file]
bp_usage/transliterate_vals.wiki [new file with mode: 0644]
bp_usage/write_bed [deleted file]
bp_usage/write_bed.wiki [new file with mode: 0644]
code_perl/Maasha/Gwiki.pm

diff --git a/bp_usage/fold_seq b/bp_usage/fold_seq
deleted file mode 100644 (file)
index 08e2b10..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Author:         Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact:        mail@maasha.dk
-
-Date:           February 2008
-
-License:        GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description:    Folds sequences in stream.
-
-Usage:          ... | $script [options]
-
-Options:  [-I <file> | --stream_in=<file>]   -  Read input from stream file   -  Default=STDIN
-Options:  [-O <file> | --stream_out=<file>]  -  Write output to stream file   -  Default=STDOUT
-
-Examples:  ... | $script  -  Folds all sequences in stream.
diff --git a/bp_usage/fold_seq.wiki b/bp_usage/fold_seq.wiki
new file mode 100644 (file)
index 0000000..4f7dd4b
--- /dev/null
@@ -0,0 +1,102 @@
+=Biopiece: fold_seq=
+
+==Synopsis==
+
+Fold sequences in stream.
+
+==Description==
+
+[fold_seq] fold nucleotide sequence into secodary structure, which is useful to illustrate
+e.g. miRNA hairpins. The resulting folding information may be uploaded to a custom UCSC Genome
+Browser track (see [upload_to_ucsc]).
+
+[fold_seq] currently uses RNAfold from the Vienna package as folding engine. RNAfold must be
+installed for [fold_seq] to work. Read more about RNAfold here:
+
+http://www.tbi.univie.ac.at/RNA/
+
+==Usage==
+
+{{{
+... | fold_seq [options]
+}}}
+
+==Options==
+
+{{{
+[-I <file> | --stream_in=<file>]   -  Read input from stream file   -  Default=STDIN
+[-O <file> | --stream_out=<file>]  -  Write output to stream file   -  Default=STDOUT
+}}}
+
+==Examples==
+
+Consider the following RNA sequence entry in FASTA format from the file `test.fna`:
+
+{{{
+>MI0000116
+UUCAGCCUUUGAGAGUUCCAUGCUUCCUUGCAUUCAAUAGUUAUAUUCAAGCAUAUGGAAUGUAAAGAAGUAUGGAGCGAAAUCUGGCGAG
+}}}
+
+We can read that file with [read_fasta] and then fold the sequence with [fold_seq]:
+
+{{{
+read_fasta -i test.fna | fold_seq
+
+SIZE: 91
+FREE_ENERGY: -36.20
+SCORE: 36
+SEQ: UUCAGCCUUUGAGAGUUCCAUGCUUCCUUGCAUUCAAUAGUUAUAUUCAAGCAUAUGGAAUGUAAAGAAGUAUGGAGCGAAAUCUGGCGAG
+SEQ_NAME: MI0000116
+SEQ_LEN: 91
+SEC_STRUCT: ....(((...((..((((((((((((.((((((((.((((((.......))).))).)))))))).))))))))))))....)).)))...
+CONF: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 <truncated>
+---
+}}}
+
+The resulting record contains the secondary structure in Stockholm format as the value to SEC_STRUCT.
+Also, the FREE_ENERGY key is useful for selecting good structures. [grab] is your friend. The CONF key
+holds confidence information for the folding at any given position (not used, so set to 1s).
+
+Now, if you have a fully configure UCSC Genome Browser system installed, it is possible to upload secondary
+structure information. To do this, you must have a BED entry, and the folded sequence. The reasonable way to
+do this is to have a BED file with the coordinates of the sequence you wish to fold and display and then the 
+magic:
+
+{{{
+read_bed -i <BED file> | get_genome_seq -g <genome> | fold_seq | upload_to_ucsc -d <genome> -t <my_table_rnaSecStr> -x
+}}}
+
+Notice that the table name _must_ contain the string 'rnaSecStr' to display the folding information in the
+Genome Browser.
+
+==See also==
+
+[read_fasta]
+
+[upload_to_ucsc]
+
+[grab]
+
+[read_bed]
+
+[get_genome_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[fold_seq] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
diff --git a/bp_usage/transliterate_seq b/bp_usage/transliterate_seq
deleted file mode 100644 (file)
index 9a3af94..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Author:         Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact:        mail@maasha.dk
-
-Date:           August 2007
-
-License:        GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description:    Transliterate chars from sequences in stream.
-
-Usage:          ... | $script [options]
-
-Options:   [-s <chars> | --search=<chars>]     -  String of chars to locate and replace
-Options:   [-r <chars> | --replace=<chars>]    -  String of chars for replacing
-Options:   [-d <chars> | --delete=<chars>]     -  String of chars to delete
-Options:   [-I <file>  | --stream_in=<file>]   -  Read input from stream file  -  Default=STDIN
-Options:   [-O <file>  | --stream_out=<file>]  -  Write output to stream file  -  Default=STDOUT
-
-Examples:   ... | $script -s Uu -r Tt  -  Replacing u with t as in converting RNA to DNA.
-Examples:   ... | $script -d '.~-'     -  Removing indels.
-
-Keys in: SEQ  -  Sequence.
diff --git a/bp_usage/transliterate_seq.wiki b/bp_usage/transliterate_seq.wiki
new file mode 100644 (file)
index 0000000..d8e000f
--- /dev/null
@@ -0,0 +1,79 @@
+=Biopiece: transliterate_seq=
+
+==Synopsis==
+
+Transliterate chars from sequences in stream.
+
+==Description==
+
+Transliteration is ultra fast search and replace (or search and delete) of characters in sequence
+and is useful for things as lowercasing sequence, converting sequence from RNA to DNA or convering
+soft-masked sequence to hard-masked.
+
+==Usage==
+
+{{{
+... | transliterate_seq [options]
+}}}
+
+==Options==
+
+{{{
+[-s <chars> | --search=<chars>]     -  String of chars to locate and replace
+[-r <chars> | --replace=<chars>]    -  String of chars for replacing
+[-d <chars> | --delete=<chars>]     -  String of chars to delete
+[-I <file>  | --stream_in=<file>]   -  Read input from stream file  -  Default=STDIN
+[-O <file>  | --stream_out=<file>]  -  Write output to stream file  -  Default=STDOUT
+}}}
+
+==Examples==
+
+To lowercase sequence:
+
+{{{
+transliterate_seq -s ATCGUN -r atcgun
+}}}
+
+To convert RNA to DNA:
+
+{{{
+transliterate_seq -s Uu -r Tt
+}}}
+
+To convert soft-masked sequence to hard-masked:
+
+{{{
+transliterate_seq -s atcgu -r NNNNN
+}}}
+
+To remove all N's from a sequence:
+
+{{{
+transliterate_seq -s nN -d
+}}}
+
+==See also==
+
+[transliterate_vals]
+
+[uppercase_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[transliterate_seq] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
diff --git a/bp_usage/transliterate_vals b/bp_usage/transliterate_vals
deleted file mode 100644 (file)
index 5101a07..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-Author:         Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact:        mail@maasha.dk
-
-Date:           April 2008
-
-License:        GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description:    Transliterate chars from values in stream.
-
-Usage:          ... | $script [options]
-
-Options:   [-k <string> | --keys=<string>]      -  List of values to transliterate
-Options:   [-s <chars>  | --search=<chars>]     -  String of chars to locate and replace
-Options:   [-r <chars>  | --replace=<chars>]    -  String of chars for replacing
-Options:   [-d <chars>  | --delete=<chars>]     -  String of chars to delete
-Options:   [-I <file>   | --stream_in=<file>]   -  Read input from stream file  -  Default=STDIN
-Options:   [-O <file>   | --stream_out=<file>]  -  Write output to stream file  -  Default=STDOUT
-
-Examples:   ... | $script -k SEQ -s Uu -r Tt   -  Replacing u with t in SEQ (i.e converting RNA to DNA).
-Examples:   ... | $script -k PATTERN -d '.~-'  -  Removing indels from PATTERN.
diff --git a/bp_usage/transliterate_vals.wiki b/bp_usage/transliterate_vals.wiki
new file mode 100644 (file)
index 0000000..c0b5ebe
--- /dev/null
@@ -0,0 +1,65 @@
+=Biopiece: transliterate_vals=
+
+==Synopsis==
+
+Transliterate chars from values in stream.
+
+==Description==
+
+Transliteration is ultra fast search and replace (or search and delete) of characters in values
+and is useful for things as converting sequence from RNA to DNA or removing indels from patterns.
+
+==Usage==
+
+{{{
+... | transliterate_vals [options]
+}}}
+
+==Options==
+
+{{{
+[-k <string> | --keys=<string>]      -  List of values to transliterate
+[-s <chars>  | --search=<chars>]     -  String of chars to locate and replace
+[-r <chars>  | --replace=<chars>]    -  String of chars for replacing
+[-d <chars>  | --delete=<chars>]     -  String of chars to delete
+[-I <file>   | --stream_in=<file>]   -  Read input from stream file  -  Default=STDIN
+[-O <file>   | --stream_out=<file>]  -  Write output to stream file  -  Default=STDOUT
+}}}
+
+==Examples==
+
+To convert RNA sequence to DNA:
+
+{{{
+transliterate_vals -k SEQ -s Uu -r Tt
+}}}
+
+To remove indels from patterns:
+
+{{{
+transliterate_vals -k PATTERN -s '._-~' -d
+}}}
+
+==See also==
+
+[transliterate_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[transliterate_vals] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
diff --git a/bp_usage/write_bed b/bp_usage/write_bed
deleted file mode 100644 (file)
index a71f9b7..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-Author:         Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact:        mail@maasha.dk
-
-Date:           August 2007
-
-License:        GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description:    Write records from stream as BED lines.
-
-Usage:          ... | $script [options]
-
-Options:   [-x        | --no_stream]          -  Do not emit records.
-Options:   [-o <file> | --data_out=<file>]    -  Write result to file.
-Options:   [-I <file> | --stream_in=<file>]   -  Read input from stream file  -  Default=STDIN
-Options:   [-O <file> | --stream_out=<file>]  -  Write output to stream file  -  Default=STDOUT
-Options:   [-Z        | --compress]           -  Compress output using gzip.
-
-Examples:   ... | $script -x -o table.bed     -  Output data to file 'table.bed'.
-Examples:   ... | $script -Z -o table.bed.gz  -  Output zipped data to file 'table.bed.gz'.
-
-Keys in: REC_TYPE  -  Either BED,PSL,PATSCAN,BLAST,VMATCH, otherwise generic.
-Keys in: CHR       -  Chromosome.
-Keys in: CHR_BEG   -  Chromosome begin position.
-Keys in: CHR_END   -  Chromosome end position.
-Keys in: Q_ID      -  Feature name.
-Keys in: SCORE     -  Score.
-Keys in: STRAND    -  Strand.
diff --git a/bp_usage/write_bed.wiki b/bp_usage/write_bed.wiki
new file mode 100644 (file)
index 0000000..e375fde
--- /dev/null
@@ -0,0 +1,98 @@
+=Biopiece: write_bed=
+
+==Synopsis==
+
+Write BED entries from stream.
+
+==Description==
+
+[write_bed] outputs BED (Browser Extensible Data) records from the stream.
+The BED format consists of up to 12 columns, where the first three are mandatory.
+
+  # CHR - the name of the chromosome.
+  # CHR_BEG  - the chromosome begin position.
+  # CHR_END  - the chromosome end position.
+  # Q_ID - the name of the feature.
+  # SCORE - a score between 0 and 1000.
+  # STRAND - the orientation of the feature.
+  # THICK_BEG - begin position of 'thick' drawing used for UTRs.
+  # THICK_END - end position of 'thick' drawing used for UTRs.
+  # ITEMRGB - RGB color code for feature.
+  # BLOCKCOUNT - number of exon blocks.
+  # BLOCKSIZES - list of block sizes.
+  # Q_BEGS - list of block begins.
+
+Records can be written to BED format if the REC_TYPE is either:
+
+  * BED - from [read_bed]
+  * PSL - from [read_psl] or [blat_seq]
+  * PATSCAN - from [patscan_seq]
+  * BLAST - from [read_blast_tab] or [blast_seq]
+  * VMATCH - from [vmatch_seq]
+
+... and if S_ID key begins with 'chr'.
+
+Also, generic records may be output as BED format if the record contains
+CHR, CHR_BEG, and CHR_END keys and if the CHR value begins with 'chr'.
+
+For more about the BED format:
+
+http://genome.ucsc.edu/FAQ/FAQformat#format1
+
+==Usage==
+
+{{{
+... | write_bed [options]
+}}}
+
+==Options==
+
+{{{
+[-x        | --no_stream]          -  Do not emit records.
+[-o <file> | --data_out=<file>]    -  Write result to file.
+[-I <file> | --stream_in=<file>]   -  Read input from stream file  -  Default=STDIN
+[-O <file> | --stream_out=<file>]  -  Write output to stream file  -  Default=STDOUT
+[-Z        | --compress]           -  Compress output using gzip.
+}}}
+
+==Examples==
+
+{{{
+... | write_bed -xo test.bed
+}}}
+
+==See also==
+
+[read_bed]
+
+[read_psl]
+
+[blat_seq]
+
+[patscan_seq]
+
+[read_blast_tab]
+
+[blast_seq]
+
+[vmatch_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[write_bed] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
index d97a8d4e5543069aea65e0030299b30f9651fd60..5f471195e6aeb15e4a0dceaa1eefda32836c0dee 100644 (file)
@@ -76,16 +76,12 @@ sub gwiki2ascii
             for ( $i = 0; $i < @{ $block }; $i++ ) {
                 push @lines, "   * $block->[ $i ]->{ 'TEXT' }";
             }
-
-            push @lines, "";
         }
         elsif ( $block->[ 0 ]->{ 'FORMAT' } eq "enumerate" )
         {
             for ( $i = 0; $i < @{ $block }; $i++ ) {
                 push @lines, "   " . ( $i + 1 ) . ". $block->[ $i ]->{ 'TEXT' }";
             }
-
-            push @lines, "";
         }
         elsif ( $block->[ 0 ]->{ 'FORMAT' } eq "paragraph" )
         {
@@ -97,9 +93,9 @@ sub gwiki2ascii
                 $line =~ s/\s*$//;
                 $line =~ s/\s+/ /g;
                 $line =~ tr/`//d;
-                $line =~ s/\[([^\]]+?)\]/$1/g;
-                $line =~ s/\*([^\*]+?)\*/&text_bold($1)/ge;
-                $line =~ s/_([^_]+?)_/&text_underline($1)/ge;
+                $line =~ s/\[([^\]]+?)\]/&text_underline($1)/ge;
+                $line =~ s/\*([^\*]+?)\*/&text_bold($1)/ge       if $line =~ /(^| )\*[^\*]+?\*( |$)/;
+                $line =~ s/_([^_]+?)_/&text_underline($1)/ge     if $line =~ /(^| )_[^_]+?_( |$)/;
 
                 push @lines, $_ foreach &Maasha::Common::wrap_line( $line, 80 );
             }
@@ -270,7 +266,7 @@ sub text_bold
     my ( $txt,
        ) = @_;
 
-    return colored ( $txt, "magenta" );
+    return colored ( $txt, "bold" );
 }