+++ /dev/null
-Author: Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact: mail@maasha.dk
-
-Date: February 2008
-
-License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description: Folds sequences in stream.
-
-Usage: ... | $script [options]
-
-Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
-Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
-
-Examples: ... | $script - Folds all sequences in stream.
--- /dev/null
+=Biopiece: fold_seq=
+
+==Synopsis==
+
+Fold sequences in stream.
+
+==Description==
+
+[fold_seq] fold nucleotide sequence into secodary structure, which is useful to illustrate
+e.g. miRNA hairpins. The resulting folding information may be uploaded to a custom UCSC Genome
+Browser track (see [upload_to_ucsc]).
+
+[fold_seq] currently uses RNAfold from the Vienna package as folding engine. RNAfold must be
+installed for [fold_seq] to work. Read more about RNAfold here:
+
+http://www.tbi.univie.ac.at/RNA/
+
+==Usage==
+
+{{{
+... | fold_seq [options]
+}}}
+
+==Options==
+
+{{{
+[-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+[-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+}}}
+
+==Examples==
+
+Consider the following RNA sequence entry in FASTA format from the file `test.fna`:
+
+{{{
+>MI0000116
+UUCAGCCUUUGAGAGUUCCAUGCUUCCUUGCAUUCAAUAGUUAUAUUCAAGCAUAUGGAAUGUAAAGAAGUAUGGAGCGAAAUCUGGCGAG
+}}}
+
+We can read that file with [read_fasta] and then fold the sequence with [fold_seq]:
+
+{{{
+read_fasta -i test.fna | fold_seq
+
+SIZE: 91
+FREE_ENERGY: -36.20
+SCORE: 36
+SEQ: UUCAGCCUUUGAGAGUUCCAUGCUUCCUUGCAUUCAAUAGUUAUAUUCAAGCAUAUGGAAUGUAAAGAAGUAUGGAGCGAAAUCUGGCGAG
+SEQ_NAME: MI0000116
+SEQ_LEN: 91
+SEC_STRUCT: ....(((...((..((((((((((((.((((((((.((((((.......))).))).)))))))).))))))))))))....)).)))...
+CONF: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 <truncated>
+---
+}}}
+
+The resulting record contains the secondary structure in Stockholm format as the value to SEC_STRUCT.
+Also, the FREE_ENERGY key is useful for selecting good structures. [grab] is your friend. The CONF key
+holds confidence information for the folding at any given position (not used, so set to 1s).
+
+Now, if you have a fully configure UCSC Genome Browser system installed, it is possible to upload secondary
+structure information. To do this, you must have a BED entry, and the folded sequence. The reasonable way to
+do this is to have a BED file with the coordinates of the sequence you wish to fold and display and then the
+magic:
+
+{{{
+read_bed -i <BED file> | get_genome_seq -g <genome> | fold_seq | upload_to_ucsc -d <genome> -t <my_table_rnaSecStr> -x
+}}}
+
+Notice that the table name _must_ contain the string 'rnaSecStr' to display the folding information in the
+Genome Browser.
+
+==See also==
+
+[read_fasta]
+
+[upload_to_ucsc]
+
+[grab]
+
+[read_bed]
+
+[get_genome_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[fold_seq] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
+++ /dev/null
-Author: Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact: mail@maasha.dk
-
-Date: August 2007
-
-License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description: Transliterate chars from sequences in stream.
-
-Usage: ... | $script [options]
-
-Options: [-s <chars> | --search=<chars>] - String of chars to locate and replace
-Options: [-r <chars> | --replace=<chars>] - String of chars for replacing
-Options: [-d <chars> | --delete=<chars>] - String of chars to delete
-Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
-Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
-
-Examples: ... | $script -s Uu -r Tt - Replacing u with t as in converting RNA to DNA.
-Examples: ... | $script -d '.~-' - Removing indels.
-
-Keys in: SEQ - Sequence.
--- /dev/null
+=Biopiece: transliterate_seq=
+
+==Synopsis==
+
+Transliterate chars from sequences in stream.
+
+==Description==
+
+Transliteration is ultra fast search and replace (or search and delete) of characters in sequence
+and is useful for things as lowercasing sequence, converting sequence from RNA to DNA or convering
+soft-masked sequence to hard-masked.
+
+==Usage==
+
+{{{
+... | transliterate_seq [options]
+}}}
+
+==Options==
+
+{{{
+[-s <chars> | --search=<chars>] - String of chars to locate and replace
+[-r <chars> | --replace=<chars>] - String of chars for replacing
+[-d <chars> | --delete=<chars>] - String of chars to delete
+[-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+[-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+}}}
+
+==Examples==
+
+To lowercase sequence:
+
+{{{
+transliterate_seq -s ATCGUN -r atcgun
+}}}
+
+To convert RNA to DNA:
+
+{{{
+transliterate_seq -s Uu -r Tt
+}}}
+
+To convert soft-masked sequence to hard-masked:
+
+{{{
+transliterate_seq -s atcgu -r NNNNN
+}}}
+
+To remove all N's from a sequence:
+
+{{{
+transliterate_seq -s nN -d
+}}}
+
+==See also==
+
+[transliterate_vals]
+
+[uppercase_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[transliterate_seq] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
+++ /dev/null
-Author: Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact: mail@maasha.dk
-
-Date: April 2008
-
-License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description: Transliterate chars from values in stream.
-
-Usage: ... | $script [options]
-
-Options: [-k <string> | --keys=<string>] - List of values to transliterate
-Options: [-s <chars> | --search=<chars>] - String of chars to locate and replace
-Options: [-r <chars> | --replace=<chars>] - String of chars for replacing
-Options: [-d <chars> | --delete=<chars>] - String of chars to delete
-Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
-Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
-
-Examples: ... | $script -k SEQ -s Uu -r Tt - Replacing u with t in SEQ (i.e converting RNA to DNA).
-Examples: ... | $script -k PATTERN -d '.~-' - Removing indels from PATTERN.
--- /dev/null
+=Biopiece: transliterate_vals=
+
+==Synopsis==
+
+Transliterate chars from values in stream.
+
+==Description==
+
+Transliteration is ultra fast search and replace (or search and delete) of characters in values
+and is useful for things as converting sequence from RNA to DNA or removing indels from patterns.
+
+==Usage==
+
+{{{
+... | transliterate_vals [options]
+}}}
+
+==Options==
+
+{{{
+[-k <string> | --keys=<string>] - List of values to transliterate
+[-s <chars> | --search=<chars>] - String of chars to locate and replace
+[-r <chars> | --replace=<chars>] - String of chars for replacing
+[-d <chars> | --delete=<chars>] - String of chars to delete
+[-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+[-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+}}}
+
+==Examples==
+
+To convert RNA sequence to DNA:
+
+{{{
+transliterate_vals -k SEQ -s Uu -r Tt
+}}}
+
+To remove indels from patterns:
+
+{{{
+transliterate_vals -k PATTERN -s '._-~' -d
+}}}
+
+==See also==
+
+[transliterate_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[transliterate_vals] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
+++ /dev/null
-Author: Martin Asser Hansen - Copyright (C) - All rights reserved
-
-Contact: mail@maasha.dk
-
-Date: August 2007
-
-License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
-
-Description: Write records from stream as BED lines.
-
-Usage: ... | $script [options]
-
-Options: [-x | --no_stream] - Do not emit records.
-Options: [-o <file> | --data_out=<file>] - Write result to file.
-Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
-Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
-Options: [-Z | --compress] - Compress output using gzip.
-
-Examples: ... | $script -x -o table.bed - Output data to file 'table.bed'.
-Examples: ... | $script -Z -o table.bed.gz - Output zipped data to file 'table.bed.gz'.
-
-Keys in: REC_TYPE - Either BED,PSL,PATSCAN,BLAST,VMATCH, otherwise generic.
-Keys in: CHR - Chromosome.
-Keys in: CHR_BEG - Chromosome begin position.
-Keys in: CHR_END - Chromosome end position.
-Keys in: Q_ID - Feature name.
-Keys in: SCORE - Score.
-Keys in: STRAND - Strand.
--- /dev/null
+=Biopiece: write_bed=
+
+==Synopsis==
+
+Write BED entries from stream.
+
+==Description==
+
+[write_bed] outputs BED (Browser Extensible Data) records from the stream.
+The BED format consists of up to 12 columns, where the first three are mandatory.
+
+ # CHR - the name of the chromosome.
+ # CHR_BEG - the chromosome begin position.
+ # CHR_END - the chromosome end position.
+ # Q_ID - the name of the feature.
+ # SCORE - a score between 0 and 1000.
+ # STRAND - the orientation of the feature.
+ # THICK_BEG - begin position of 'thick' drawing used for UTRs.
+ # THICK_END - end position of 'thick' drawing used for UTRs.
+ # ITEMRGB - RGB color code for feature.
+ # BLOCKCOUNT - number of exon blocks.
+ # BLOCKSIZES - list of block sizes.
+ # Q_BEGS - list of block begins.
+
+Records can be written to BED format if the REC_TYPE is either:
+
+ * BED - from [read_bed]
+ * PSL - from [read_psl] or [blat_seq]
+ * PATSCAN - from [patscan_seq]
+ * BLAST - from [read_blast_tab] or [blast_seq]
+ * VMATCH - from [vmatch_seq]
+
+... and if S_ID key begins with 'chr'.
+
+Also, generic records may be output as BED format if the record contains
+CHR, CHR_BEG, and CHR_END keys and if the CHR value begins with 'chr'.
+
+For more about the BED format:
+
+http://genome.ucsc.edu/FAQ/FAQformat#format1
+
+==Usage==
+
+{{{
+... | write_bed [options]
+}}}
+
+==Options==
+
+{{{
+[-x | --no_stream] - Do not emit records.
+[-o <file> | --data_out=<file>] - Write result to file.
+[-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+[-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+[-Z | --compress] - Compress output using gzip.
+}}}
+
+==Examples==
+
+{{{
+... | write_bed -xo test.bed
+}}}
+
+==See also==
+
+[read_bed]
+
+[read_psl]
+
+[blat_seq]
+
+[patscan_seq]
+
+[read_blast_tab]
+
+[blast_seq]
+
+[vmatch_seq]
+
+==Author==
+
+Martin Asser Hansen - Copyright (C) - All rights reserved.
+
+mail@maasha.dk
+
+August 2007
+
+==License==
+
+GNU General Public License version 2
+
+http://www.gnu.org/copyleft/gpl.html
+
+==Help==
+
+[write_bed] is part of the Biopieces framework.
+
+http://code.google.com/p/biopieces/
for ( $i = 0; $i < @{ $block }; $i++ ) {
push @lines, " * $block->[ $i ]->{ 'TEXT' }";
}
-
- push @lines, "";
}
elsif ( $block->[ 0 ]->{ 'FORMAT' } eq "enumerate" )
{
for ( $i = 0; $i < @{ $block }; $i++ ) {
push @lines, " " . ( $i + 1 ) . ". $block->[ $i ]->{ 'TEXT' }";
}
-
- push @lines, "";
}
elsif ( $block->[ 0 ]->{ 'FORMAT' } eq "paragraph" )
{
$line =~ s/\s*$//;
$line =~ s/\s+/ /g;
$line =~ tr/`//d;
- $line =~ s/\[([^\]]+?)\]/$1/g;
- $line =~ s/\*([^\*]+?)\*/&text_bold($1)/ge;
- $line =~ s/_([^_]+?)_/&text_underline($1)/ge;
+ $line =~ s/\[([^\]]+?)\]/&text_underline($1)/ge;
+ $line =~ s/\*([^\*]+?)\*/&text_bold($1)/ge if $line =~ /(^| )\*[^\*]+?\*( |$)/;
+ $line =~ s/_([^_]+?)_/&text_underline($1)/ge if $line =~ /(^| )_[^_]+?_( |$)/;
push @lines, $_ foreach &Maasha::Common::wrap_line( $line, 80 );
}
my ( $txt,
) = @_;
- return colored ( $txt, "magenta" );
+ return colored ( $txt, "bold" );
}