--- /dev/null
+# Copyright (C) 2007-2008 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
--- /dev/null
+This directory contains configuration files for Biopieces.
+
+ bashrc - contains environment settings for bash shell environment
+
+
+Equivalent configuration files may be written for other shell types, tcsh, csh, etc.
+
+
+
+Martin A. Hansen, July 2008
--- /dev/null
+# >>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Stuff that enables biotools.
+
+export TOOLS_DIR="/Users/m.hansen/tools" # Contains binaries for BLAST and Vmatch.
+export INST_DIR="/Users/m.hansen/maasha" # Contains scripts and modules.
+export DATA_DIR="/Users/m.hansen/DATA" # Contains genomic data etc.
+export TMP_DIR="/Users/m.hansen/maasha/tmp" # Required temporary directory.
+export LOG_DIR="/Users/m.hansen/maasha/log" # Log directory
+
+export PATH="$PATH:$TOOLS_DIR/blast-2.2.17/bin:$TOOLS_DIR/vmatch.distribution"
+export PATH="$INST_DIR/bin/:$INST_DIR/perl/bin:$INST_DIR/perl_scripts/:$INST_DIR/biotools:$PATH"
+export PERL5LIB="$PERL5LIB:$INST_DIR/perl_modules"
+
+# Alias allowing power scripting with biotools
+
+alias bioscript="perl -MMaasha::Biotools=read_stream,get_record,put_record -e"
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+This directory contains add-hoc data files needed by Biopieces
+
+ human_cytobands.txt - Cytoband information for plot_karyogram
+ mouse_cytobands.txt - Cytoband information for plot_karyogram
+
+
+Martin A. Hansen, July 2008
--- /dev/null
+#chrom chromStart chromEnd name gieStain
+chr1 0 2300000 p36.33 gneg
+chr1 2300000 5300000 p36.32 gpos25
+chr1 5300000 7100000 p36.31 gneg
+chr1 7100000 9200000 p36.23 gpos25
+chr1 9200000 12600000 p36.22 gneg
+chr1 12600000 16100000 p36.21 gpos50
+chr1 16100000 20300000 p36.13 gneg
+chr1 20300000 23800000 p36.12 gpos25
+chr1 23800000 27800000 p36.11 gneg
+chr1 27800000 30000000 p35.3 gpos25
+chr1 30000000 32200000 p35.2 gneg
+chr1 32200000 34400000 p35.1 gpos25
+chr1 34400000 39600000 p34.3 gneg
+chr1 39600000 43900000 p34.2 gpos25
+chr1 43900000 46500000 p34.1 gneg
+chr1 46500000 51300000 p33 gpos75
+chr1 51300000 56200000 p32.3 gneg
+chr1 56200000 58700000 p32.2 gpos50
+chr1 58700000 60900000 p32.1 gneg
+chr1 60900000 68700000 p31.3 gpos50
+chr1 68700000 69500000 p31.2 gneg
+chr1 69500000 84700000 p31.1 gpos100
+chr1 84700000 88100000 p22.3 gneg
+chr1 88100000 92000000 p22.2 gpos75
+chr1 92000000 94500000 p22.1 gneg
+chr1 94500000 99400000 p21.3 gpos75
+chr1 99400000 102000000 p21.2 gneg
+chr1 102000000 107000000 p21.1 gpos100
+chr1 107000000 111600000 p13.3 gneg
+chr1 111600000 115900000 p13.2 gpos50
+chr1 115900000 117600000 p13.1 gneg
+chr1 117600000 120700000 p12 gpos50
+chr1 120700000 121100000 p11.2 gneg
+chr1 121100000 124300000 p11.1 acen
+chr1 124300000 128000000 q11 acen
+chr1 128000000 142400000 q12 gvar
+chr1 142400000 148000000 q21.1 gneg
+chr1 148000000 149600000 q21.2 gpos50
+chr1 149600000 153300000 q21.3 gneg
+chr1 153300000 154800000 q22 gpos50
+chr1 154800000 157300000 q23.1 gneg
+chr1 157300000 158800000 q23.2 gpos50
+chr1 158800000 163800000 q23.3 gneg
+chr1 163800000 165500000 q24.1 gpos50
+chr1 165500000 169100000 q24.2 gneg
+chr1 169100000 171200000 q24.3 gpos75
+chr1 171200000 174300000 q25.1 gneg
+chr1 174300000 178600000 q25.2 gpos50
+chr1 178600000 184000000 q25.3 gneg
+chr1 184000000 189000000 q31.1 gpos100
+chr1 189000000 192100000 q31.2 gneg
+chr1 192100000 197500000 q31.3 gpos100
+chr1 197500000 205300000 q32.1 gneg
+chr1 205300000 209500000 q32.2 gpos25
+chr1 209500000 212100000 q32.3 gneg
+chr1 212100000 222100000 q41 gpos100
+chr1 222100000 222700000 q42.11 gneg
+chr1 222700000 225100000 q42.12 gpos25
+chr1 225100000 228800000 q42.13 gneg
+chr1 228800000 232700000 q42.2 gpos50
+chr1 232700000 234600000 q42.3 gneg
+chr1 234600000 241700000 q43 gpos75
+chr1 241700000 247249719 q44 gneg
+chr2 0 4300000 p25.3 gneg
+chr2 4300000 7000000 p25.2 gpos50
+chr2 7000000 12800000 p25.1 gneg
+chr2 12800000 17000000 p24.3 gpos75
+chr2 17000000 19100000 p24.2 gneg
+chr2 19100000 23900000 p24.1 gpos75
+chr2 23900000 27700000 p23.3 gneg
+chr2 27700000 29800000 p23.2 gpos25
+chr2 29800000 31900000 p23.1 gneg
+chr2 31900000 36400000 p22.3 gpos75
+chr2 36400000 38400000 p22.2 gneg
+chr2 38400000 41600000 p22.1 gpos50
+chr2 41600000 47600000 p21 gneg
+chr2 47600000 52700000 p16.3 gpos100
+chr2 52700000 54800000 p16.2 gneg
+chr2 54800000 61100000 p16.1 gpos100
+chr2 61100000 64000000 p15 gneg
+chr2 64000000 70500000 p14 gpos50
+chr2 70500000 72600000 p13.3 gneg
+chr2 72600000 73900000 p13.2 gpos50
+chr2 73900000 75400000 p13.1 gneg
+chr2 75400000 83700000 p12 gpos100
+chr2 83700000 91000000 p11.2 gneg
+chr2 91000000 93300000 p11.1 acen
+chr2 93300000 95700000 q11.1 acen
+chr2 95700000 102100000 q11.2 gneg
+chr2 102100000 105300000 q12.1 gpos50
+chr2 105300000 106700000 q12.2 gneg
+chr2 106700000 108600000 q12.3 gpos25
+chr2 108600000 113800000 q13 gneg
+chr2 113800000 118600000 q14.1 gpos50
+chr2 118600000 122100000 q14.2 gneg
+chr2 122100000 129600000 q14.3 gpos50
+chr2 129600000 132200000 q21.1 gneg
+chr2 132200000 134800000 q21.2 gpos25
+chr2 134800000 136600000 q21.3 gneg
+chr2 136600000 142400000 q22.1 gpos100
+chr2 142400000 144700000 q22.2 gneg
+chr2 144700000 148400000 q22.3 gpos100
+chr2 148400000 149600000 q23.1 gneg
+chr2 149600000 150300000 q23.2 gpos25
+chr2 150300000 154600000 q23.3 gneg
+chr2 154600000 159600000 q24.1 gpos75
+chr2 159600000 163500000 q24.2 gneg
+chr2 163500000 169500000 q24.3 gpos75
+chr2 169500000 177700000 q31.1 gneg
+chr2 177700000 180400000 q31.2 gpos50
+chr2 180400000 182700000 q31.3 gneg
+chr2 182700000 189100000 q32.1 gpos75
+chr2 189100000 191600000 q32.2 gneg
+chr2 191600000 197100000 q32.3 gpos75
+chr2 197100000 203500000 q33.1 gneg
+chr2 203500000 205600000 q33.2 gpos50
+chr2 205600000 209100000 q33.3 gneg
+chr2 209100000 215100000 q34 gpos100
+chr2 215100000 221300000 q35 gneg
+chr2 221300000 224900000 q36.1 gpos75
+chr2 224900000 225800000 q36.2 gneg
+chr2 225800000 230700000 q36.3 gpos100
+chr2 230700000 235300000 q37.1 gneg
+chr2 235300000 237000000 q37.2 gpos50
+chr2 237000000 242951149 q37.3 gneg
+chr3 0 3500000 p26.3 gpos50
+chr3 3500000 5500000 p26.2 gneg
+chr3 5500000 8700000 p26.1 gpos50
+chr3 8700000 11500000 p25.3 gneg
+chr3 11500000 12400000 p25.2 gpos25
+chr3 12400000 14700000 p25.1 gneg
+chr3 14700000 23800000 p24.3 gpos100
+chr3 23800000 26400000 p24.2 gneg
+chr3 26400000 30800000 p24.1 gpos75
+chr3 30800000 32100000 p23 gneg
+chr3 32100000 36500000 p22.3 gpos50
+chr3 36500000 39300000 p22.2 gneg
+chr3 39300000 43600000 p22.1 gpos75
+chr3 43600000 44400000 p21.33 gneg
+chr3 44400000 44700000 p21.32 gpos50
+chr3 44700000 51400000 p21.31 gneg
+chr3 51400000 51700000 p21.2 gpos25
+chr3 51700000 54400000 p21.1 gneg
+chr3 54400000 58500000 p14.3 gpos50
+chr3 58500000 63700000 p14.2 gneg
+chr3 63700000 71800000 p14.1 gpos50
+chr3 71800000 74200000 p13 gneg
+chr3 74200000 81800000 p12.3 gpos75
+chr3 81800000 83700000 p12.2 gneg
+chr3 83700000 87200000 p12.1 gpos75
+chr3 87200000 89400000 p11.2 gneg
+chr3 89400000 91700000 p11.1 acen
+chr3 91700000 93200000 q11.1 acen
+chr3 93200000 99800000 q11.2 gvar
+chr3 99800000 101500000 q12.1 gneg
+chr3 101500000 102500000 q12.2 gpos25
+chr3 102500000 104400000 q12.3 gneg
+chr3 104400000 107800000 q13.11 gpos75
+chr3 107800000 109500000 q13.12 gneg
+chr3 109500000 112800000 q13.13 gpos50
+chr3 112800000 115000000 q13.2 gneg
+chr3 115000000 118800000 q13.31 gpos75
+chr3 118800000 120500000 q13.32 gneg
+chr3 120500000 123400000 q13.33 gpos75
+chr3 123400000 125400000 q21.1 gneg
+chr3 125400000 127700000 q21.2 gpos25
+chr3 127700000 131500000 q21.3 gneg
+chr3 131500000 135700000 q22.1 gpos25
+chr3 135700000 137400000 q22.2 gneg
+chr3 137400000 140400000 q22.3 gpos25
+chr3 140400000 144400000 q23 gneg
+chr3 144400000 150400000 q24 gpos100
+chr3 150400000 153500000 q25.1 gneg
+chr3 153500000 156300000 q25.2 gpos50
+chr3 156300000 158100000 q25.31 gneg
+chr3 158100000 159900000 q25.32 gpos50
+chr3 159900000 161200000 q25.33 gneg
+chr3 161200000 169200000 q26.1 gpos100
+chr3 169200000 172500000 q26.2 gneg
+chr3 172500000 177300000 q26.31 gpos75
+chr3 177300000 180600000 q26.32 gneg
+chr3 180600000 184200000 q26.33 gpos75
+chr3 184200000 186000000 q27.1 gneg
+chr3 186000000 187500000 q27.2 gpos25
+chr3 187500000 189400000 q27.3 gneg
+chr3 189400000 193800000 q28 gpos75
+chr3 193800000 199501827 q29 gneg
+chr4 0 3100000 p16.3 gneg
+chr4 3100000 5200000 p16.2 gpos25
+chr4 5200000 10900000 p16.1 gneg
+chr4 10900000 15300000 p15.33 gpos50
+chr4 15300000 18500000 p15.32 gneg
+chr4 18500000 23100000 p15.31 gpos75
+chr4 23100000 27900000 p15.2 gneg
+chr4 27900000 35500000 p15.1 gpos100
+chr4 35500000 40900000 p14 gneg
+chr4 40900000 45600000 p13 gpos50
+chr4 45600000 48700000 p12 gneg
+chr4 48700000 50700000 p11 acen
+chr4 50700000 52400000 q11 acen
+chr4 52400000 59200000 q12 gneg
+chr4 59200000 66300000 q13.1 gpos100
+chr4 66300000 70400000 q13.2 gneg
+chr4 70400000 76500000 q13.3 gpos75
+chr4 76500000 79200000 q21.1 gneg
+chr4 79200000 82600000 q21.21 gpos50
+chr4 82600000 84300000 q21.22 gneg
+chr4 84300000 87100000 q21.23 gpos25
+chr4 87100000 88200000 q21.3 gneg
+chr4 88200000 94000000 q22.1 gpos75
+chr4 94000000 95400000 q22.2 gneg
+chr4 95400000 99100000 q22.3 gpos75
+chr4 99100000 102500000 q23 gneg
+chr4 102500000 107900000 q24 gpos50
+chr4 107900000 114100000 q25 gneg
+chr4 114100000 120600000 q26 gpos75
+chr4 120600000 124000000 q27 gneg
+chr4 124000000 129100000 q28.1 gpos50
+chr4 129100000 131300000 q28.2 gneg
+chr4 131300000 139500000 q28.3 gpos100
+chr4 139500000 141700000 q31.1 gneg
+chr4 141700000 145000000 q31.21 gpos25
+chr4 145000000 147700000 q31.22 gneg
+chr4 147700000 151000000 q31.23 gpos25
+chr4 151000000 155100000 q31.3 gneg
+chr4 155100000 161500000 q32.1 gpos100
+chr4 161500000 164500000 q32.2 gneg
+chr4 164500000 170400000 q32.3 gpos100
+chr4 170400000 172200000 q33 gneg
+chr4 172200000 176600000 q34.1 gpos75
+chr4 176600000 177800000 q34.2 gneg
+chr4 177800000 182600000 q34.3 gpos100
+chr4 182600000 187300000 q35.1 gneg
+chr4 187300000 191273063 q35.2 gpos25
+chr5 0 4400000 p15.33 gneg
+chr5 4400000 6000000 p15.32 gpos25
+chr5 6000000 8200000 p15.31 gneg
+chr5 8200000 15100000 p15.2 gpos50
+chr5 15100000 18500000 p15.1 gneg
+chr5 18500000 23300000 p14.3 gpos100
+chr5 23300000 24700000 p14.2 gneg
+chr5 24700000 29300000 p14.1 gpos100
+chr5 29300000 34400000 p13.3 gneg
+chr5 34400000 38500000 p13.2 gpos25
+chr5 38500000 42400000 p13.1 gneg
+chr5 42400000 45800000 p12 gpos50
+chr5 45800000 47700000 p11 acen
+chr5 47700000 50500000 q11.1 acen
+chr5 50500000 58900000 q11.2 gneg
+chr5 58900000 63000000 q12.1 gpos75
+chr5 63000000 63700000 q12.2 gneg
+chr5 63700000 66500000 q12.3 gpos75
+chr5 66500000 68400000 q13.1 gneg
+chr5 68400000 73300000 q13.2 gpos50
+chr5 73300000 76400000 q13.3 gneg
+chr5 76400000 81300000 q14.1 gpos50
+chr5 81300000 82800000 q14.2 gneg
+chr5 82800000 91900000 q14.3 gpos100
+chr5 91900000 97300000 q15 gneg
+chr5 97300000 102800000 q21.1 gpos100
+chr5 102800000 104500000 q21.2 gneg
+chr5 104500000 109600000 q21.3 gpos100
+chr5 109600000 111500000 q22.1 gneg
+chr5 111500000 113100000 q22.2 gpos50
+chr5 113100000 115200000 q22.3 gneg
+chr5 115200000 121500000 q23.1 gpos100
+chr5 121500000 127300000 q23.2 gneg
+chr5 127300000 130400000 q23.3 gpos100
+chr5 130400000 135400000 q31.1 gneg
+chr5 135400000 139000000 q31.2 gpos25
+chr5 139000000 143100000 q31.3 gneg
+chr5 143100000 147200000 q32 gpos75
+chr5 147200000 152100000 q33.1 gneg
+chr5 152100000 155600000 q33.2 gpos50
+chr5 155600000 159900000 q33.3 gneg
+chr5 159900000 167400000 q34 gpos100
+chr5 167400000 172200000 q35.1 gneg
+chr5 172200000 176500000 q35.2 gpos25
+chr5 176500000 180857866 q35.3 gneg
+chr6 0 2300000 p25.3 gneg
+chr6 2300000 4100000 p25.2 gpos25
+chr6 4100000 7000000 p25.1 gneg
+chr6 7000000 10600000 p24.3 gpos50
+chr6 10600000 11200000 p24.2 gneg
+chr6 11200000 13500000 p24.1 gpos25
+chr6 13500000 15500000 p23 gneg
+chr6 15500000 23500000 p22.3 gpos75
+chr6 23500000 26100000 p22.2 gneg
+chr6 26100000 29900000 p22.1 gpos50
+chr6 29900000 31900000 p21.33 gneg
+chr6 31900000 33600000 p21.32 gpos25
+chr6 33600000 36800000 p21.31 gneg
+chr6 36800000 40600000 p21.2 gpos25
+chr6 40600000 45200000 p21.1 gneg
+chr6 45200000 51100000 p12.3 gpos100
+chr6 51100000 52600000 p12.2 gneg
+chr6 52600000 57200000 p12.1 gpos100
+chr6 57200000 58400000 p11.2 gneg
+chr6 58400000 60500000 p11.1 acen
+chr6 60500000 63400000 q11.1 acen
+chr6 63400000 63500000 q11.2 gneg
+chr6 63500000 70000000 q12 gpos100
+chr6 70000000 75900000 q13 gneg
+chr6 75900000 83900000 q14.1 gpos50
+chr6 83900000 84700000 q14.2 gneg
+chr6 84700000 87500000 q14.3 gpos50
+chr6 87500000 92100000 q15 gneg
+chr6 92100000 98700000 q16.1 gpos100
+chr6 98700000 99900000 q16.2 gneg
+chr6 99900000 104800000 q16.3 gpos100
+chr6 104800000 113900000 q21 gneg
+chr6 113900000 117100000 q22.1 gpos75
+chr6 117100000 118600000 q22.2 gneg
+chr6 118600000 126200000 q22.31 gpos100
+chr6 126200000 127300000 q22.32 gneg
+chr6 127300000 130400000 q22.33 gpos75
+chr6 130400000 131300000 q23.1 gneg
+chr6 131300000 135200000 q23.2 gpos50
+chr6 135200000 139100000 q23.3 gneg
+chr6 139100000 142900000 q24.1 gpos75
+chr6 142900000 145700000 q24.2 gneg
+chr6 145700000 149100000 q24.3 gpos75
+chr6 149100000 152600000 q25.1 gneg
+chr6 152600000 155600000 q25.2 gpos50
+chr6 155600000 160900000 q25.3 gneg
+chr6 160900000 164400000 q26 gpos50
+chr6 164400000 170899992 q27 gneg
+chr7 0 2100000 p22.3 gneg
+chr7 2100000 4500000 p22.2 gpos25
+chr7 4500000 7200000 p22.1 gneg
+chr7 7200000 13300000 p21.3 gpos100
+chr7 13300000 15200000 p21.2 gneg
+chr7 15200000 19500000 p21.1 gpos100
+chr7 19500000 24900000 p15.3 gneg
+chr7 24900000 28000000 p15.2 gpos50
+chr7 28000000 31800000 p15.1 gneg
+chr7 31800000 35600000 p14.3 gpos75
+chr7 35600000 37500000 p14.2 gneg
+chr7 37500000 43300000 p14.1 gpos75
+chr7 43300000 46600000 p13 gneg
+chr7 46600000 49800000 p12.3 gpos75
+chr7 49800000 50900000 p12.2 gneg
+chr7 50900000 53900000 p12.1 gpos75
+chr7 53900000 57400000 p11.2 gneg
+chr7 57400000 59100000 p11.1 acen
+chr7 59100000 61100000 q11.1 acen
+chr7 61100000 66100000 q11.21 gneg
+chr7 66100000 71800000 q11.22 gpos50
+chr7 71800000 77400000 q11.23 gneg
+chr7 77400000 86200000 q21.11 gpos100
+chr7 86200000 88000000 q21.12 gneg
+chr7 88000000 90900000 q21.13 gpos75
+chr7 90900000 92600000 q21.2 gneg
+chr7 92600000 97900000 q21.3 gpos75
+chr7 97900000 104400000 q22.1 gneg
+chr7 104400000 105900000 q22.2 gpos50
+chr7 105900000 107200000 q22.3 gneg
+chr7 107200000 114400000 q31.1 gpos75
+chr7 114400000 117200000 q31.2 gneg
+chr7 117200000 120900000 q31.31 gpos75
+chr7 120900000 123600000 q31.32 gneg
+chr7 123600000 126900000 q31.33 gpos75
+chr7 126900000 129000000 q32.1 gneg
+chr7 129000000 130100000 q32.2 gpos25
+chr7 130100000 132400000 q32.3 gneg
+chr7 132400000 137300000 q33 gpos50
+chr7 137300000 142800000 q34 gneg
+chr7 142800000 147500000 q35 gpos75
+chr7 147500000 152200000 q36.1 gneg
+chr7 152200000 154700000 q36.2 gpos25
+chr7 154700000 158821424 q36.3 gneg
+chr8 0 2200000 p23.3 gneg
+chr8 2200000 6200000 p23.2 gpos75
+chr8 6200000 12700000 p23.1 gneg
+chr8 12700000 19100000 p22 gpos100
+chr8 19100000 23400000 p21.3 gneg
+chr8 23400000 27400000 p21.2 gpos50
+chr8 27400000 29700000 p21.1 gneg
+chr8 29700000 38500000 p12 gpos75
+chr8 38500000 39500000 p11.23 gneg
+chr8 39500000 39900000 p11.22 gpos25
+chr8 39900000 43200000 p11.21 gneg
+chr8 43200000 45200000 p11.1 acen
+chr8 45200000 48100000 q11.1 acen
+chr8 48100000 50400000 q11.21 gneg
+chr8 50400000 52800000 q11.22 gpos75
+chr8 52800000 55600000 q11.23 gneg
+chr8 55600000 61700000 q12.1 gpos50
+chr8 61700000 62400000 q12.2 gneg
+chr8 62400000 66100000 q12.3 gpos50
+chr8 66100000 68100000 q13.1 gneg
+chr8 68100000 70600000 q13.2 gpos50
+chr8 70600000 74000000 q13.3 gneg
+chr8 74000000 78500000 q21.11 gpos100
+chr8 78500000 80300000 q21.12 gneg
+chr8 80300000 84900000 q21.13 gpos75
+chr8 84900000 87200000 q21.2 gneg
+chr8 87200000 93500000 q21.3 gpos100
+chr8 93500000 99100000 q22.1 gneg
+chr8 99100000 101600000 q22.2 gpos25
+chr8 101600000 106100000 q22.3 gneg
+chr8 106100000 110600000 q23.1 gpos75
+chr8 110600000 112200000 q23.2 gneg
+chr8 112200000 117700000 q23.3 gpos100
+chr8 117700000 119200000 q24.11 gneg
+chr8 119200000 122500000 q24.12 gpos50
+chr8 122500000 127300000 q24.13 gneg
+chr8 127300000 131500000 q24.21 gpos50
+chr8 131500000 136500000 q24.22 gneg
+chr8 136500000 140000000 q24.23 gpos75
+chr8 140000000 146274826 q24.3 gneg
+chr9 0 2200000 p24.3 gneg
+chr9 2200000 4600000 p24.2 gpos25
+chr9 4600000 9000000 p24.1 gneg
+chr9 9000000 14100000 p23 gpos75
+chr9 14100000 16600000 p22.3 gneg
+chr9 16600000 18500000 p22.2 gpos25
+chr9 18500000 19900000 p22.1 gneg
+chr9 19900000 25500000 p21.3 gpos100
+chr9 25500000 28100000 p21.2 gneg
+chr9 28100000 32800000 p21.1 gpos100
+chr9 32800000 36300000 p13.3 gneg
+chr9 36300000 38000000 p13.2 gpos25
+chr9 38000000 40200000 p13.1 gneg
+chr9 40200000 42400000 p12 gpos50
+chr9 42400000 46700000 p11.2 gneg
+chr9 46700000 51800000 p11.1 acen
+chr9 51800000 60300000 q11 acen
+chr9 60300000 70000000 q12 gvar
+chr9 70000000 70500000 q13 gneg
+chr9 70500000 72700000 q21.11 gpos25
+chr9 72700000 73100000 q21.12 gneg
+chr9 73100000 79300000 q21.13 gpos50
+chr9 79300000 80300000 q21.2 gneg
+chr9 80300000 83400000 q21.31 gpos50
+chr9 83400000 86100000 q21.32 gneg
+chr9 86100000 89600000 q21.33 gpos50
+chr9 89600000 91000000 q22.1 gneg
+chr9 91000000 93000000 q22.2 gpos25
+chr9 93000000 95600000 q22.31 gneg
+chr9 95600000 98200000 q22.32 gpos25
+chr9 98200000 101600000 q22.33 gneg
+chr9 101600000 107200000 q31.1 gpos100
+chr9 107200000 110300000 q31.2 gneg
+chr9 110300000 113900000 q31.3 gpos25
+chr9 113900000 116700000 q32 gneg
+chr9 116700000 122000000 q33.1 gpos75
+chr9 122000000 125800000 q33.2 gneg
+chr9 125800000 129300000 q33.3 gpos25
+chr9 129300000 132500000 q34.11 gneg
+chr9 132500000 132800000 q34.12 gpos25
+chr9 132800000 134900000 q34.13 gneg
+chr9 134900000 136600000 q34.2 gpos25
+chr9 136600000 140273252 q34.3 gneg
+chrX 0 4300000 p22.33 gneg
+chrX 4300000 6000000 p22.32 gpos50
+chrX 6000000 9500000 p22.31 gneg
+chrX 9500000 17100000 p22.2 gpos50
+chrX 17100000 19200000 p22.13 gneg
+chrX 19200000 21800000 p22.12 gpos50
+chrX 21800000 24900000 p22.11 gneg
+chrX 24900000 29400000 p21.3 gpos100
+chrX 29400000 31500000 p21.2 gneg
+chrX 31500000 37500000 p21.1 gpos100
+chrX 37500000 42300000 p11.4 gneg
+chrX 42300000 47300000 p11.3 gpos75
+chrX 47300000 49700000 p11.23 gneg
+chrX 49700000 54700000 p11.22 gpos25
+chrX 54700000 56600000 p11.21 gneg
+chrX 56600000 59500000 p11.1 acen
+chrX 59500000 65000000 q11.1 acen
+chrX 65000000 65100000 q11.2 gneg
+chrX 65100000 67700000 q12 gpos50
+chrX 67700000 72200000 q13.1 gneg
+chrX 72200000 73800000 q13.2 gpos50
+chrX 73800000 76000000 q13.3 gneg
+chrX 76000000 84500000 q21.1 gpos100
+chrX 84500000 86200000 q21.2 gneg
+chrX 86200000 91900000 q21.31 gpos100
+chrX 91900000 93500000 q21.32 gneg
+chrX 93500000 98200000 q21.33 gpos75
+chrX 98200000 102500000 q22.1 gneg
+chrX 102500000 103600000 q22.2 gpos50
+chrX 103600000 110500000 q22.3 gneg
+chrX 110500000 116800000 q23 gpos75
+chrX 116800000 120700000 q24 gneg
+chrX 120700000 129800000 q25 gpos100
+chrX 129800000 130300000 q26.1 gneg
+chrX 130300000 133500000 q26.2 gpos25
+chrX 133500000 137800000 q26.3 gneg
+chrX 137800000 140100000 q27.1 gpos75
+chrX 140100000 141900000 q27.2 gneg
+chrX 141900000 146900000 q27.3 gpos100
+chrX 146900000 154913754 q28 gneg
+chrY 0 1700000 p11.32 gneg
+chrY 1700000 3300000 p11.31 gpos50
+chrY 3300000 11200000 p11.2 gneg
+chrY 11200000 11300000 p11.1 acen
+chrY 11300000 12500000 q11.1 acen
+chrY 12500000 14300000 q11.21 gneg
+chrY 14300000 19000000 q11.221 gpos50
+chrY 19000000 21300000 q11.222 gneg
+chrY 21300000 25400000 q11.223 gpos50
+chrY 25400000 27200000 q11.23 gneg
+chrY 27200000 57772954 q12 gvar
+chr10 0 3000000 p15.3 gneg
+chr10 3000000 3800000 p15.2 gpos25
+chr10 3800000 6700000 p15.1 gneg
+chr10 6700000 12300000 p14 gpos75
+chr10 12300000 17300000 p13 gneg
+chr10 17300000 19900000 p12.33 gpos75
+chr10 19900000 20500000 p12.32 gneg
+chr10 20500000 22800000 p12.31 gpos75
+chr10 22800000 24100000 p12.2 gneg
+chr10 24100000 28300000 p12.1 gpos50
+chr10 28300000 31400000 p11.23 gneg
+chr10 31400000 34500000 p11.22 gpos25
+chr10 34500000 38800000 p11.21 gneg
+chr10 38800000 40300000 p11.1 acen
+chr10 40300000 42100000 q11.1 acen
+chr10 42100000 46100000 q11.21 gneg
+chr10 46100000 50100000 q11.22 gpos25
+chr10 50100000 53300000 q11.23 gneg
+chr10 53300000 61200000 q21.1 gpos100
+chr10 61200000 64800000 q21.2 gneg
+chr10 64800000 71300000 q21.3 gpos100
+chr10 71300000 74600000 q22.1 gneg
+chr10 74600000 77400000 q22.2 gpos50
+chr10 77400000 82000000 q22.3 gneg
+chr10 82000000 87900000 q23.1 gpos100
+chr10 87900000 89600000 q23.2 gneg
+chr10 89600000 92900000 q23.31 gpos75
+chr10 92900000 94200000 q23.32 gneg
+chr10 94200000 98000000 q23.33 gpos50
+chr10 98000000 99400000 q24.1 gneg
+chr10 99400000 102000000 q24.2 gpos50
+chr10 102000000 103000000 q24.31 gneg
+chr10 103000000 104900000 q24.32 gpos25
+chr10 104900000 105700000 q24.33 gneg
+chr10 105700000 111800000 q25.1 gpos100
+chr10 111800000 114900000 q25.2 gneg
+chr10 114900000 119100000 q25.3 gpos75
+chr10 119100000 121700000 q26.11 gneg
+chr10 121700000 123100000 q26.12 gpos50
+chr10 123100000 127400000 q26.13 gneg
+chr10 127400000 130500000 q26.2 gpos50
+chr10 130500000 135374737 q26.3 gneg
+chr11 0 2800000 p15.5 gneg
+chr11 2800000 10700000 p15.4 gpos50
+chr11 10700000 12600000 p15.3 gneg
+chr11 12600000 16100000 p15.2 gpos50
+chr11 16100000 21600000 p15.1 gneg
+chr11 21600000 26000000 p14.3 gpos100
+chr11 26000000 27200000 p14.2 gneg
+chr11 27200000 31000000 p14.1 gpos75
+chr11 31000000 36400000 p13 gneg
+chr11 36400000 43400000 p12 gpos100
+chr11 43400000 48800000 p11.2 gneg
+chr11 48800000 51400000 p11.12 gpos75
+chr11 51400000 52900000 p11.11 acen
+chr11 52900000 56400000 q11 acen
+chr11 56400000 59700000 q12.1 gpos75
+chr11 59700000 61400000 q12.2 gneg
+chr11 61400000 63100000 q12.3 gpos25
+chr11 63100000 67100000 q13.1 gneg
+chr11 67100000 69200000 q13.2 gpos25
+chr11 69200000 70700000 q13.3 gneg
+chr11 70700000 74900000 q13.4 gpos50
+chr11 74900000 76700000 q13.5 gneg
+chr11 76700000 85300000 q14.1 gpos100
+chr11 85300000 87900000 q14.2 gneg
+chr11 87900000 92300000 q14.3 gpos100
+chr11 92300000 96700000 q21 gneg
+chr11 96700000 101600000 q22.1 gpos100
+chr11 101600000 102400000 q22.2 gneg
+chr11 102400000 110000000 q22.3 gpos100
+chr11 110000000 112800000 q23.1 gneg
+chr11 112800000 115400000 q23.2 gpos50
+chr11 115400000 120700000 q23.3 gneg
+chr11 120700000 123500000 q24.1 gpos50
+chr11 123500000 127400000 q24.2 gneg
+chr11 127400000 130300000 q24.3 gpos50
+chr11 130300000 134452384 q25 gneg
+chr12 0 3100000 p13.33 gneg
+chr12 3100000 5300000 p13.32 gpos25
+chr12 5300000 10000000 p13.31 gneg
+chr12 10000000 12600000 p13.2 gpos75
+chr12 12600000 14800000 p13.1 gneg
+chr12 14800000 19900000 p12.3 gpos100
+chr12 19900000 21200000 p12.2 gneg
+chr12 21200000 26300000 p12.1 gpos100
+chr12 26300000 27700000 p11.23 gneg
+chr12 27700000 30600000 p11.22 gpos50
+chr12 30600000 33200000 p11.21 gneg
+chr12 33200000 35400000 p11.1 acen
+chr12 35400000 36500000 q11 acen
+chr12 36500000 44600000 q12 gpos100
+chr12 44600000 47400000 q13.11 gneg
+chr12 47400000 48400000 q13.12 gpos25
+chr12 48400000 53100000 q13.13 gneg
+chr12 53100000 55200000 q13.2 gpos25
+chr12 55200000 56300000 q13.3 gneg
+chr12 56300000 61400000 q14.1 gpos75
+chr12 61400000 63400000 q14.2 gneg
+chr12 63400000 66000000 q14.3 gpos50
+chr12 66000000 69800000 q15 gneg
+chr12 69800000 74100000 q21.1 gpos75
+chr12 74100000 78700000 q21.2 gneg
+chr12 78700000 85100000 q21.31 gpos100
+chr12 85100000 87500000 q21.32 gneg
+chr12 87500000 91200000 q21.33 gpos100
+chr12 91200000 94800000 q22 gneg
+chr12 94800000 100000000 q23.1 gpos75
+chr12 100000000 102400000 q23.2 gneg
+chr12 102400000 107500000 q23.3 gpos50
+chr12 107500000 110200000 q24.11 gneg
+chr12 110200000 110800000 q24.12 gpos25
+chr12 110800000 112800000 q24.13 gneg
+chr12 112800000 115300000 q24.21 gpos50
+chr12 115300000 116700000 q24.22 gneg
+chr12 116700000 119100000 q24.23 gpos50
+chr12 119100000 124500000 q24.31 gneg
+chr12 124500000 128700000 q24.32 gpos50
+chr12 128700000 132349534 q24.33 gneg
+chr13 0 3800000 p13 gvar
+chr13 3800000 8300000 p12 stalk
+chr13 8300000 13500000 p11.2 gvar
+chr13 13500000 16000000 p11.1 acen
+chr13 16000000 18400000 q11 acen
+chr13 18400000 22200000 q12.11 gneg
+chr13 22200000 24400000 q12.12 gpos25
+chr13 24400000 26700000 q12.13 gneg
+chr13 26700000 27800000 q12.2 gpos25
+chr13 27800000 31100000 q12.3 gneg
+chr13 31100000 32900000 q13.1 gpos50
+chr13 32900000 34700000 q13.2 gneg
+chr13 34700000 39500000 q13.3 gpos75
+chr13 39500000 44300000 q14.11 gneg
+chr13 44300000 45900000 q14.12 gpos25
+chr13 45900000 46200000 q14.13 gneg
+chr13 46200000 48900000 q14.2 gpos50
+chr13 48900000 52200000 q14.3 gneg
+chr13 52200000 57600000 q21.1 gpos100
+chr13 57600000 60500000 q21.2 gneg
+chr13 60500000 64100000 q21.31 gpos75
+chr13 64100000 67200000 q21.32 gneg
+chr13 67200000 72100000 q21.33 gpos100
+chr13 72100000 74200000 q22.1 gneg
+chr13 74200000 76000000 q22.2 gpos50
+chr13 76000000 77800000 q22.3 gneg
+chr13 77800000 86500000 q31.1 gpos100
+chr13 86500000 88800000 q31.2 gneg
+chr13 88800000 93800000 q31.3 gpos100
+chr13 93800000 97000000 q32.1 gneg
+chr13 97000000 98100000 q32.2 gpos25
+chr13 98100000 100500000 q32.3 gneg
+chr13 100500000 103700000 q33.1 gpos100
+chr13 103700000 105800000 q33.2 gneg
+chr13 105800000 109100000 q33.3 gpos100
+chr13 109100000 114142980 q34 gneg
+chr14 0 3100000 p13 gvar
+chr14 3100000 6700000 p12 stalk
+chr14 6700000 13600000 p11.2 gvar
+chr14 13600000 15600000 p11.1 acen
+chr14 15600000 19100000 q11.1 acen
+chr14 19100000 23600000 q11.2 gneg
+chr14 23600000 31800000 q12 gpos100
+chr14 31800000 34100000 q13.1 gneg
+chr14 34100000 35600000 q13.2 gpos50
+chr14 35600000 36900000 q13.3 gneg
+chr14 36900000 41000000 q21.1 gpos100
+chr14 41000000 43200000 q21.2 gneg
+chr14 43200000 48300000 q21.3 gpos100
+chr14 48300000 52300000 q22.1 gneg
+chr14 52300000 54400000 q22.2 gpos25
+chr14 54400000 55800000 q22.3 gneg
+chr14 55800000 61200000 q23.1 gpos75
+chr14 61200000 64000000 q23.2 gneg
+chr14 64000000 67000000 q23.3 gpos50
+chr14 67000000 69300000 q24.1 gneg
+chr14 69300000 72900000 q24.2 gpos50
+chr14 72900000 78400000 q24.3 gneg
+chr14 78400000 82600000 q31.1 gpos100
+chr14 82600000 84000000 q31.2 gneg
+chr14 84000000 88900000 q31.3 gpos100
+chr14 88900000 90500000 q32.11 gneg
+chr14 90500000 92800000 q32.12 gpos25
+chr14 92800000 95400000 q32.13 gneg
+chr14 95400000 100400000 q32.2 gpos50
+chr14 100400000 102200000 q32.31 gneg
+chr14 102200000 103000000 q32.32 gpos50
+chr14 103000000 106368585 q32.33 gneg
+chr15 0 3500000 p13 gvar
+chr15 3500000 7900000 p12 stalk
+chr15 7900000 14100000 p11.2 gvar
+chr15 14100000 17000000 p11.1 acen
+chr15 17000000 18400000 q11.1 acen
+chr15 18400000 23300000 q11.2 gneg
+chr15 23300000 25700000 q12 gpos50
+chr15 25700000 28000000 q13.1 gneg
+chr15 28000000 29000000 q13.2 gpos50
+chr15 29000000 31400000 q13.3 gneg
+chr15 31400000 37900000 q14 gpos75
+chr15 37900000 40700000 q15.1 gneg
+chr15 40700000 41400000 q15.2 gpos25
+chr15 41400000 42700000 q15.3 gneg
+chr15 42700000 47600000 q21.1 gpos75
+chr15 47600000 51100000 q21.2 gneg
+chr15 51100000 55800000 q21.3 gpos75
+chr15 55800000 57100000 q22.1 gneg
+chr15 57100000 61500000 q22.2 gpos25
+chr15 61500000 64900000 q22.31 gneg
+chr15 64900000 65000000 q22.32 gpos25
+chr15 65000000 65300000 q22.33 gneg
+chr15 65300000 70400000 q23 gpos25
+chr15 70400000 73100000 q24.1 gneg
+chr15 73100000 74400000 q24.2 gpos25
+chr15 74400000 76100000 q24.3 gneg
+chr15 76100000 79500000 q25.1 gpos50
+chr15 79500000 83000000 q25.2 gneg
+chr15 83000000 86900000 q25.3 gpos50
+chr15 86900000 92100000 q26.1 gneg
+chr15 92100000 96300000 q26.2 gpos50
+chr15 96300000 100338915 q26.3 gneg
+chr16 0 6300000 p13.3 gneg
+chr16 6300000 10300000 p13.2 gpos50
+chr16 10300000 12500000 p13.13 gneg
+chr16 12500000 14700000 p13.12 gpos50
+chr16 14700000 16700000 p13.11 gneg
+chr16 16700000 20500000 p12.3 gpos50
+chr16 20500000 21700000 p12.2 gneg
+chr16 21700000 27600000 p12.1 gpos50
+chr16 27600000 34400000 p11.2 gneg
+chr16 34400000 38200000 p11.1 acen
+chr16 38200000 40700000 q11.1 acen
+chr16 40700000 45500000 q11.2 gvar
+chr16 45500000 51200000 q12.1 gneg
+chr16 51200000 54500000 q12.2 gpos50
+chr16 54500000 56700000 q13 gneg
+chr16 56700000 65200000 q21 gpos100
+chr16 65200000 69400000 q22.1 gneg
+chr16 69400000 69800000 q22.2 gpos50
+chr16 69800000 73300000 q22.3 gneg
+chr16 73300000 78200000 q23.1 gpos75
+chr16 78200000 80500000 q23.2 gneg
+chr16 80500000 82700000 q23.3 gpos50
+chr16 82700000 85600000 q24.1 gneg
+chr16 85600000 87200000 q24.2 gpos25
+chr16 87200000 88827254 q24.3 gneg
+chr17 0 3600000 p13.3 gneg
+chr17 3600000 6800000 p13.2 gpos50
+chr17 6800000 11200000 p13.1 gneg
+chr17 11200000 15900000 p12 gpos75
+chr17 15900000 22100000 p11.2 gneg
+chr17 22100000 22200000 p11.1 acen
+chr17 22200000 23200000 q11.1 acen
+chr17 23200000 28800000 q11.2 gneg
+chr17 28800000 35400000 q12 gpos50
+chr17 35400000 35600000 q21.1 gneg
+chr17 35600000 37800000 q21.2 gpos25
+chr17 37800000 41900000 q21.31 gneg
+chr17 41900000 44800000 q21.32 gpos25
+chr17 44800000 47600000 q21.33 gneg
+chr17 47600000 54900000 q22 gpos75
+chr17 54900000 55600000 q23.1 gneg
+chr17 55600000 58400000 q23.2 gpos75
+chr17 58400000 59900000 q23.3 gneg
+chr17 59900000 61600000 q24.1 gpos50
+chr17 61600000 64600000 q24.2 gneg
+chr17 64600000 68400000 q24.3 gpos75
+chr17 68400000 72200000 q25.1 gneg
+chr17 72200000 72900000 q25.2 gpos25
+chr17 72900000 78774742 q25.3 gneg
+chr18 0 2900000 p11.32 gneg
+chr18 2900000 7200000 p11.31 gpos50
+chr18 7200000 8500000 p11.23 gneg
+chr18 8500000 10900000 p11.22 gpos25
+chr18 10900000 15400000 p11.21 gneg
+chr18 15400000 16100000 p11.1 acen
+chr18 16100000 17300000 q11.1 acen
+chr18 17300000 23300000 q11.2 gneg
+chr18 23300000 31000000 q12.1 gpos100
+chr18 31000000 35500000 q12.2 gneg
+chr18 35500000 41800000 q12.3 gpos75
+chr18 41800000 46400000 q21.1 gneg
+chr18 46400000 52000000 q21.2 gpos75
+chr18 52000000 54400000 q21.31 gneg
+chr18 54400000 57100000 q21.32 gpos50
+chr18 57100000 59800000 q21.33 gneg
+chr18 59800000 64900000 q22.1 gpos100
+chr18 64900000 66900000 q22.2 gneg
+chr18 66900000 71300000 q22.3 gpos25
+chr18 71300000 76117153 q23 gneg
+chr19 0 6900000 p13.3 gneg
+chr19 6900000 12600000 p13.2 gpos25
+chr19 12600000 13800000 p13.13 gneg
+chr19 13800000 16100000 p13.12 gpos25
+chr19 16100000 19800000 p13.11 gneg
+chr19 19800000 26700000 p12 gvar
+chr19 26700000 28500000 p11 acen
+chr19 28500000 30200000 q11 acen
+chr19 30200000 37100000 q12 gvar
+chr19 37100000 40300000 q13.11 gneg
+chr19 40300000 43000000 q13.12 gpos25
+chr19 43000000 43400000 q13.13 gneg
+chr19 43400000 47800000 q13.2 gpos25
+chr19 47800000 50000000 q13.31 gneg
+chr19 50000000 53800000 q13.32 gpos25
+chr19 53800000 57600000 q13.33 gneg
+chr19 57600000 59100000 q13.41 gpos25
+chr19 59100000 61400000 q13.42 gneg
+chr19 61400000 63811651 q13.43 gpos25
+chr20 0 5000000 p13 gneg
+chr20 5000000 9000000 p12.3 gpos75
+chr20 9000000 11900000 p12.2 gneg
+chr20 11900000 17800000 p12.1 gpos75
+chr20 17800000 21200000 p11.23 gneg
+chr20 21200000 22300000 p11.22 gpos25
+chr20 22300000 25700000 p11.21 gneg
+chr20 25700000 27100000 p11.1 acen
+chr20 27100000 28400000 q11.1 acen
+chr20 28400000 31500000 q11.21 gneg
+chr20 31500000 33900000 q11.22 gpos25
+chr20 33900000 37100000 q11.23 gneg
+chr20 37100000 41100000 q12 gpos75
+chr20 41100000 41600000 q13.11 gneg
+chr20 41600000 45800000 q13.12 gpos25
+chr20 45800000 49200000 q13.13 gneg
+chr20 49200000 54400000 q13.2 gpos75
+chr20 54400000 55900000 q13.31 gneg
+chr20 55900000 57900000 q13.32 gpos50
+chr20 57900000 62435964 q13.33 gneg
+chr21 0 2900000 p13 gvar
+chr21 2900000 6300000 p12 stalk
+chr21 6300000 10000000 p11.2 gvar
+chr21 10000000 12300000 p11.1 acen
+chr21 12300000 13200000 q11.1 acen
+chr21 13200000 15300000 q11.2 gneg
+chr21 15300000 22900000 q21.1 gpos100
+chr21 22900000 25800000 q21.2 gneg
+chr21 25800000 30500000 q21.3 gpos75
+chr21 30500000 34700000 q22.11 gneg
+chr21 34700000 36700000 q22.12 gpos50
+chr21 36700000 38600000 q22.13 gneg
+chr21 38600000 41400000 q22.2 gpos50
+chr21 41400000 46944323 q22.3 gneg
+chr22 0 3000000 p13 gvar
+chr22 3000000 6600000 p12 stalk
+chr22 6600000 9600000 p11.2 gvar
+chr22 9600000 11800000 p11.1 acen
+chr22 11800000 16300000 q11.1 acen
+chr22 16300000 20500000 q11.21 gneg
+chr22 20500000 21800000 q11.22 gpos25
+chr22 21800000 24300000 q11.23 gneg
+chr22 24300000 27900000 q12.1 gpos50
+chr22 27900000 30500000 q12.2 gneg
+chr22 30500000 35900000 q12.3 gpos50
+chr22 35900000 39300000 q13.1 gneg
+chr22 39300000 42600000 q13.2 gpos50
+chr22 42600000 47000000 q13.31 gneg
+chr22 47000000 48200000 q13.32 gpos50
+chr22 48200000 49691432 q13.33 gneg
--- /dev/null
+#chrom chromStart chromEnd name gieStain
+chr1 0 8918386 qA1 gpos100
+chr1 8918386 12386647 qA2 gneg
+chr1 12386647 20314102 qA3 gpos33
+chr1 20314102 22295965 qA4 gneg
+chr1 22295965 31214352 qA5 gpos100
+chr1 31214352 43601000 qB gneg
+chr1 43601000 54996715 qC1.1 gpos66
+chr1 54996715 56483113 qC1.2 gneg
+chr1 56483113 59951375 qC1.3 gpos75
+chr1 59951375 65896965 qC2 gneg
+chr1 65896965 75310818 qC3 gpos33
+chr1 75310818 80760943 qC4 gneg
+chr1 80760943 88192931 qC5 gpos33
+chr1 88192931 100579579 qD gneg
+chr1 100579579 103552375 qE1.1 gpos33
+chr1 103552375 104543306 qE1.2 gneg
+chr1 104543306 113461693 qE2.1 gpos100
+chr1 113461693 114948091 qE2.2 gneg
+chr1 114948091 126839272 qE2.3 gpos100
+chr1 126839272 129812068 qE3 gneg
+chr1 129812068 140712318 qE4 gpos66
+chr1 140712318 148639772 qF gneg
+chr1 148639772 153098966 qG1 gpos100
+chr1 153098966 154089897 qG2 gneg
+chr1 154089897 158549091 qG3 gpos100
+chr1 158549091 161521886 qH1 gneg
+chr1 161521886 165485613 qH2.1 gpos33
+chr1 165485613 166972011 qH2.2 gneg
+chr1 166972011 171431204 qH2.3 gpos33
+chr1 171431204 177376795 qH3 gneg
+chr1 177376795 183322386 qH4 gpos33
+chr1 183322386 189763443 qH5 gneg
+chr1 189763443 197195432 qH6 gpos33
+chr2 0 14052687 qA1 gpos100
+chr2 14052687 16394801 qA2 gneg
+chr2 16394801 29042220 qA3 gpos33
+chr2 29042220 48247559 qB gneg
+chr2 48247559 60426554 qC1.1 gpos100
+chr2 60426554 60894977 qC1.2 gneg
+chr2 60894977 68389744 qC1.3 gpos100
+chr2 68389744 71668704 qC2 gneg
+chr2 71668704 81037162 qC3 gpos66
+chr2 81037162 88531928 qD gneg
+chr2 88531928 101179347 qE1 gpos100
+chr2 101179347 104926730 qE2 gneg
+chr2 104926730 112889920 qE3 gpos33
+chr2 112889920 115700457 qE4 gneg
+chr2 115700457 123663646 qE5 gpos66
+chr2 123663646 131626836 qF1 gneg
+chr2 131626836 134437373 qF2 gpos33
+chr2 134437373 140995294 qF3 gneg
+chr2 140995294 146616369 qG1 gpos100
+chr2 146616369 147553214 qG2 gneg
+chr2 147553214 152237444 qG3 gpos100
+chr2 152237444 158795364 qH1 gneg
+chr2 158795364 163011170 qH2 gpos33
+chr2 163011170 173316474 qH3 gneg
+chr2 173316474 181748087 qH4 gpos33
+chr3 0 18490218 qA1 gpos100
+chr3 18490218 20436557 qA2 gneg
+chr3 20436557 35520683 qA3 gpos66
+chr3 35520683 46712131 qB gneg
+chr3 46712131 56443825 qC gpos100
+chr3 56443825 60823088 qD gneg
+chr3 60823088 69581612 qE1 gpos33
+chr3 69581612 72501120 qE2 gneg
+chr3 72501120 83692569 qE3 gpos100
+chr3 83692569 92937678 qF1 gneg
+chr3 92937678 97316940 qF2.1 gpos33
+chr3 97316940 106075465 qF2.2 gneg
+chr3 106075465 108021804 qF2.3 gpos33
+chr3 108021804 114833990 qF3 gneg
+chr3 114833990 126512023 qG1 gpos100
+chr3 126512023 128458361 qG2 gneg
+chr3 128458361 138190056 qG3 gpos66
+chr3 138190056 143542487 qH1 gneg
+chr3 143542487 147921750 qH2 gpos33
+chr3 147921750 153760766 qH3 gneg
+chr3 153760766 159599783 qH4 gpos33
+chr4 0 14799183 qA1 gpos100
+chr4 14799183 17663541 qA2 gneg
+chr4 17663541 28166187 qA3 gpos100
+chr4 28166187 30075759 qA4 gneg
+chr4 30075759 43442763 qA5 gpos66
+chr4 43442763 51558444 qB1 gneg
+chr4 51558444 54900195 qB2 gpos33
+chr4 54900195 63015876 qB3 gneg
+chr4 63015876 69221985 qC1 gpos33
+chr4 69221985 71608950 qC2 gneg
+chr4 71608950 83543776 qC3 gpos100
+chr4 83543776 89272492 qC4 gneg
+chr4 89272492 96910780 qC5 gpos66
+chr4 96910780 105026461 qC6 gneg
+chr4 105026461 110277784 qC7 gpos66
+chr4 110277784 116961286 qD1 gneg
+chr4 116961286 119825644 qD2.1 gpos33
+chr4 119825644 130328290 qD2.2 gneg
+chr4 130328290 133192648 qD2.3 gpos33
+chr4 133192648 140830936 qD3 gneg
+chr4 140830936 147037045 qE1 gpos100
+chr4 147037045 155630120 qE2 gneg
+chr5 0 14964098 qA1 gpos100
+chr5 14964098 16412236 qA2 gneg
+chr5 16412236 25583780 qA3 gpos66
+chr5 25583780 33789899 qB1 gneg
+chr5 33789899 35720750 qB2 gpos33
+chr5 35720750 50684848 qB3 gneg
+chr5 50684848 58890967 qC1 gpos33
+chr5 58890967 61304531 qC2 gneg
+chr5 61304531 71924213 qC3.1 gpos100
+chr5 71924213 73855065 qC3.2 gneg
+chr5 73855065 78199480 qC3.3 gpos66
+chr5 78199480 82061183 qD gneg
+chr5 82061183 91715440 qE1 gpos100
+chr5 91715440 94129004 qE2 gneg
+chr5 94129004 99921558 qE3 gpos33
+chr5 99921558 102335123 qE4 gneg
+chr5 102335123 108127677 qE5 gpos33
+chr5 108127677 125505339 qF gneg
+chr5 125505339 127436191 qG1.1 gpos33
+chr5 127436191 128401616 qG1.2 gneg
+chr5 128401616 131297893 qG1.3 gpos33
+chr5 131297893 146744704 qG2 gneg
+chr5 146744704 152537259 qG3 gpos33
+chr6 0 16613004 qA1 gpos100
+chr6 16613004 21499181 qA2 gneg
+chr6 21499181 27362595 qA3.1 gpos100
+chr6 27362595 28339830 qA3.2 gneg
+chr6 28339830 34203243 qA3.3 gpos100
+chr6 34203243 41532510 qB1 gneg
+chr6 41532510 44464216 qB2.1 gpos66
+chr6 44464216 45930070 qB2.2 gneg
+chr6 45930070 50816247 qB2.3 gpos66
+chr6 50816247 62543074 qB3 gneg
+chr6 62543074 74269900 qC1 gpos100
+chr6 74269900 76712989 qC2 gneg
+chr6 76712989 85996727 qC3 gpos66
+chr6 85996727 94303229 qD1 gneg
+chr6 94303229 95769082 qD2 gpos33
+chr6 95769082 103098349 qD3 gneg
+chr6 103098349 108473144 qE1 gpos100
+chr6 108473144 109450380 qE2 gneg
+chr6 109450380 116779646 qE3 gpos100
+chr6 116779646 122643059 qF1 gneg
+chr6 122643059 125086148 qF2 gpos33
+chr6 125086148 131926797 qF3 gneg
+chr6 131926797 139256063 qG1 gpos66
+chr6 139256063 142676388 qG2 gneg
+chr6 142676388 149517037 qG3 gpos33
+chr7 0 15943333 qA1 gpos100
+chr7 15943333 19131999 qA2 gneg
+chr7 19131999 29760888 qA3 gpos33
+chr7 29760888 36138221 qB1 gneg
+chr7 36138221 39326888 qB2 gpos33
+chr7 39326888 49955776 qB3 gneg
+chr7 49955776 56864554 qB4 gpos33
+chr7 56864554 63773332 qB5 gneg
+chr7 63773332 74933665 qC gpos100
+chr7 74933665 80779554 qD1 gneg
+chr7 80779554 84499665 qD2 gpos66
+chr7 84499665 94597109 qD3 gneg
+chr7 94597109 104694553 qE1 gpos100
+chr7 104694553 107351775 qE2 gneg
+chr7 107351775 116917775 qE3 gpos33
+chr7 116917775 124357997 qF1 gneg
+chr7 124357997 129140997 qF2 gpos33
+chr7 129140997 144021442 qF3 gneg
+chr7 144021442 147741553 qF4 gpos33
+chr7 147741553 152524553 qF5 gneg
+chr8 0 16228701 qA1.1 gpos100
+chr8 16228701 17183331 qA1.2 gneg
+chr8 17183331 20524534 qA1.3 gpos33
+chr8 20524534 30070829 qA2 gneg
+chr8 30070829 34366662 qA3 gpos33
+chr8 34366662 44867586 qA4 gneg
+chr8 44867586 49163419 qB1.1 gpos66
+chr8 49163419 51072678 qB1.2 gneg
+chr8 51072678 56800455 qB1.3 gpos66
+chr8 56800455 60618973 qB2 gneg
+chr8 60618973 68256009 qB3.1 gpos100
+chr8 68256009 69210638 qB3.2 gneg
+chr8 69210638 75893045 qB3.3 gpos100
+chr8 75893045 81620822 qC1 gneg
+chr8 81620822 86393969 qC2 gpos33
+chr8 86393969 91644432 qC3 gneg
+chr8 91644432 93076376 qC4 gpos33
+chr8 93076376 97372209 qC5 gneg
+chr8 97372209 105009245 qD1 gpos100
+chr8 105009245 105963874 qD2 gneg
+chr8 105963874 112646281 qD3 gpos33
+chr8 112646281 126011094 qE1 gneg
+chr8 126011094 131738871 qE2 gpos33
+chr9 0 14352094 qA1 gpos100
+chr9 14352094 19444773 qA2 gneg
+chr9 19444773 24074481 qA3 gpos33
+chr9 24074481 37963604 qA4 gneg
+chr9 37963604 43982225 qA5.1 gpos66
+chr9 43982225 46297079 qA5.2 gneg
+chr9 46297079 54630553 qA5.3 gpos66
+chr9 54630553 62964027 qB gneg
+chr9 62964027 69445618 qC gpos33
+chr9 69445618 77316122 qD gneg
+chr9 77316122 82408800 qE1 gpos33
+chr9 82408800 84260683 qE2 gneg
+chr9 84260683 90742275 qE3.1 gpos100
+chr9 90742275 91205245 qE3.2 gneg
+chr9 91205245 100464661 qE3.3 gpos100
+chr9 100464661 101390603 qE4 gpos66
+chr9 101390603 107872194 qF1 gneg
+chr9 107872194 110650019 qF2 gpos33
+chr9 110650019 119446464 qF3 gneg
+chr9 119446464 124076172 qF4 gpos33
+chrX 0 15368327 qA1.1 gpos100
+chrX 15368327 17769628 qA1.2 gneg
+chrX 17769628 20651189 qA1.3 gpos33
+chrX 20651189 27374832 qA2 gneg
+chrX 27374832 32657694 qA3.1 gpos66
+chrX 32657694 33618215 qA3.2 gneg
+chrX 33618215 38901077 qA3.3 gpos66
+chrX 38901077 46585241 qA4 gneg
+chrX 46585241 54749664 qA5 gpos66
+chrX 54749664 61473308 qA6 gneg
+chrX 61473308 67716690 qA7.1 gpos66
+chrX 67716690 69157471 qA7.2 gneg
+chrX 69157471 75400854 qA7.3 gpos66
+chrX 75400854 80203456 qB gneg
+chrX 80203456 88848140 qC1 gpos100
+chrX 88848140 89808660 qC2 gneg
+chrX 89808660 98453344 qC3 gpos100
+chrX 98453344 107098028 qD gneg
+chrX 107098028 117183493 qE1 gpos100
+chrX 117183493 118144013 qE2 gneg
+chrX 118144013 131591300 qE3 gpos100
+chrX 131591300 137834682 qF1 gneg
+chrX 137834682 145038586 qF2 gpos33
+chrX 145038586 152242489 qF3 gneg
+chrX 152242489 159446392 qF4 gpos33
+chrX 159446392 166650296 qF5 gneg
+chrY 0 3578074 qA1 gpos100
+chrY 3578074 5665285 qA2 gpos66
+chrY 5665285 7851886 qB gpos33
+chrY 7851886 9442142 qC1 gpos100
+chrY 9442142 10734224 qC2 gpos33
+chrY 10734224 12523262 qC3 gpos100
+chrY 12523262 14411690 qD gpos33
+chrY 14411690 15902555 qE gpos66
+chr10 0 12754055 qA1 gpos100
+chr10 12754055 17659461 qA2 gneg
+chr10 17659461 23545948 qA3 gpos33
+chr10 23545948 33356759 qA4 gneg
+chr10 33356759 41205409 qB1 gpos100
+chr10 41205409 48072977 qB2 gneg
+chr10 48072977 55921626 qB3 gpos100
+chr10 55921626 63770276 qB4 gneg
+chr10 63770276 67694600 qB5.1 gpos100
+chr10 67694600 68185141 qB5.2 gneg
+chr10 68185141 74562168 qB5.3 gpos100
+chr10 74562168 88787845 qC1 gneg
+chr10 88787845 95655414 qC2 gpos33
+chr10 95655414 98598657 qC3 gneg
+chr10 98598657 111352712 qD1 gpos100
+chr10 111352712 124106767 qD2 gneg
+chr10 124106767 129993255 qD3 gpos33
+chr11 0 13021480 qA1 gpos100
+chr11 13021480 17206956 qA2 gneg
+chr11 17206956 21857485 qA3.1 gpos100
+chr11 21857485 25577908 qA3.2 gneg
+chr11 25577908 30228437 qA3.3 gpos100
+chr11 30228437 36274125 qA4 gneg
+chr11 36274125 43249918 qA5 gpos100
+chr11 43249918 47900447 qB1.1 gneg
+chr11 47900447 49760658 qB1.2 gpos33
+chr11 49760658 59991822 qB1.3 gneg
+chr11 59991822 62782139 qB2 gpos33
+chr11 62782139 70688038 qB3 gneg
+chr11 70688038 73943408 qB4 gpos33
+chr11 73943408 81849307 qB5 gneg
+chr11 81849307 90220259 qC gpos100
+chr11 90220259 102311634 qD gneg
+chr11 102311634 110217533 qE1 gpos66
+chr11 110217533 121843856 qE2 gneg
+chr12 0 17766671 qA1.1 gpos100
+chr12 17766671 21320005 qA1.2 gneg
+chr12 21320005 26205839 qA1.3 gpos66
+chr12 26205839 31980007 qA2 gneg
+chr12 31980007 39530843 qA3 gpos33
+chr12 39530843 44416677 qB1 gneg
+chr12 44416677 45305011 qB2 gpos33
+chr12 45305011 52411679 qB3 gneg
+chr12 52411679 66625016 qC1 gpos100
+chr12 66625016 71955017 qC2 gneg
+chr12 71955017 81726686 qC3 gpos100
+chr12 81726686 86168354 qD1 gneg
+chr12 86168354 89277522 qD2 gpos33
+chr12 89277522 96384190 qD3 gneg
+chr12 96384190 107044193 qE gpos100
+chr12 107044193 115483361 qF1 gneg
+chr12 115483361 121257530 qF2 gpos66
+chr13 0 16267960 qA1 gpos100
+chr13 16267960 21197645 qA2 gneg
+chr13 21197645 29578109 qA3.1 gpos66
+chr13 29578109 33028888 qA3.2 gneg
+chr13 33028888 41409353 qA3.3 gpos33
+chr13 41409353 44367164 qA4 gneg
+chr13 44367164 52747628 qA5 gpos33
+chr13 52747628 59156219 qB1 gneg
+chr13 59156219 61621061 qB2 gpos33
+chr13 61621061 69508557 qB3 gneg
+chr13 69508557 78381990 qC1 gpos33
+chr13 78381990 80846832 qC2 gneg
+chr13 80846832 94649950 qC3 gpos100
+chr13 94649950 106481194 qD1 gneg
+chr13 106481194 110424942 qD2.1 gpos33
+chr13 110424942 116340564 qD2.2 gneg
+chr13 116340564 120284312 qD2.3 gpos33
+chr14 0 15023383 qA1 gpos100
+chr14 15023383 19530398 qA2 gneg
+chr14 19530398 30046767 qA3 gpos33
+chr14 30046767 43567812 qB gneg
+chr14 43567812 52081063 qC1 gpos100
+chr14 52081063 55085740 qC2 gneg
+chr14 55085740 60093534 qC3 gpos66
+chr14 60093534 69107564 qD1 gneg
+chr14 69107564 73113800 qD2 gpos33
+chr14 73113800 85132507 qD3 gneg
+chr14 85132507 89138743 qE1 gpos66
+chr14 89138743 99154332 qE2.1 gpos100
+chr14 99154332 100155891 qE2.2 gneg
+chr14 100155891 107667583 qE2.3 gpos100
+chr14 107667583 111173039 qE3 gneg
+chr14 111173039 121188628 qE4 gpos100
+chr14 121188628 125194864 qE5 gneg
+chr15 0 16413299 qA1 gpos100
+chr15 16413299 24164024 qA2 gneg
+chr15 24164024 29635124 qB1 gpos33
+chr15 29635124 31914749 qB2 gneg
+chr15 31914749 42856949 qB3.1 gpos100
+chr15 42856949 44680649 qB3.2 gneg
+chr15 44680649 49695824 qB3.3 gpos66
+chr15 49695824 53343224 qC gneg
+chr15 53343224 66109124 qD1 gpos100
+chr15 66109124 68388749 qD2 gneg
+chr15 68388749 77051324 qD3 gpos66
+chr15 77051324 83434274 qE1 gneg
+chr15 83434274 86625749 qE2 gpos33
+chr15 86625749 95288324 qE3 gneg
+chr15 95288324 100759424 qF1 gpos66
+chr15 100759424 101671274 qF2 gneg
+chr15 101671274 103494974 qF3 gpos33
+chr16 0 15450152 qA1 gpos100
+chr16 15450152 16386525 qA2 gneg
+chr16 16386525 20600202 qA3 gpos33
+chr16 20600202 26218440 qB1 gneg
+chr16 26218440 32304863 qB2 gpos33
+chr16 32304863 38391287 qB3 gneg
+chr16 38391287 44945897 qB4 gpos33
+chr16 44945897 53841439 qB5 gneg
+chr16 53841439 58055117 qC1.1 gpos66
+chr16 58055117 58991490 qC1.2 gneg
+chr16 58991490 66950659 qC1.3 gpos66
+chr16 66950659 70696150 qC2 gneg
+chr16 70696150 79123506 qC3.1 gpos100
+chr16 79123506 79591692 qC3.2 gneg
+chr16 79591692 91764540 qC3.3 gpos100
+chr16 91764540 98319150 qC4 gneg
+chr17 0 13984976 qA1 gpos100
+chr17 13984976 16170128 qA2 gneg
+chr17 16170128 17481220 qA3.1 gpos33
+chr17 17481220 21851525 qA3.2 gneg
+chr17 21851525 31466196 qA3.3 gpos66
+chr17 31466196 40206806 qB1 gneg
+chr17 40206806 41517898 qB2 gpos33
+chr17 41517898 45888203 qB3 gneg
+chr17 45888203 55939905 qC gpos66
+chr17 55939905 60310210 qD gneg
+chr17 60310210 68176759 qE1.1 gpos100
+chr17 68176759 69050820 qE1.2 gneg
+chr17 69050820 73421125 qE1.3 gpos100
+chr17 73421125 78665491 qE2 gneg
+chr17 78665491 83035796 qE3 gpos33
+chr17 83035796 89154223 qE4 gneg
+chr17 89154223 95272651 qE5 gpos33
+chr18 0 19420992 qA1 gpos100
+chr18 19420992 29553684 qA2 gneg
+chr18 29553684 35464421 qB1 gpos66
+chr18 35464421 37153203 qB2 gneg
+chr18 37153203 45597113 qB3 gpos100
+chr18 45597113 49819068 qC gneg
+chr18 49819068 54041023 qD1 gpos100
+chr18 54041023 54463218 qD2 gneg
+chr18 54463218 60796150 qD3 gpos100
+chr18 60796150 67973474 qE1 gneg
+chr18 67973474 75150797 qE2 gpos33
+chr18 75150797 83594707 qE3 gneg
+chr18 83594707 90772031 qE4 gpos33
+chr19 0 16655891 qA gpos100
+chr19 16655891 25593199 qB gneg
+chr19 25593199 34936748 qC1 gpos66
+chr19 34936748 38186678 qC2 gneg
+chr19 38186678 47530227 qC3 gpos66
+chr19 47530227 51592639 qD1 gneg
+chr19 51592639 58904982 qD2 gpos33
+chr19 58904982 61342430 qD3 gneg
--- /dev/null
+#LyX 1.5.1 created this file. For more info see http://www.lyx.org/
+\lyxformat 276
+\begin_document
+\begin_header
+\textclass scrartcl
+\begin_preamble
+\usepackage[colorlinks=true, urlcolor=blue, linkcolor=black]{hyperref}
+\end_preamble
+\language english
+\inputencoding auto
+\font_roman default
+\font_sans default
+\font_typewriter default
+\font_default_family default
+\font_sc false
+\font_osf false
+\font_sf_scale 100
+\font_tt_scale 100
+\graphics default
+\paperfontsize default
+\spacing single
+\papersize default
+\use_geometry false
+\use_amsmath 1
+\use_esint 1
+\cite_engine basic
+\use_bibtopic false
+\paperorientation portrait
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation skip
+\defskip medskip
+\quotes_language english
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tracking_changes false
+\output_changes false
+\author ""
+\author ""
+\end_header
+
+\begin_body
+
+\begin_layout Title
+Biopieces Cookbook
+\end_layout
+
+\begin_layout Author
+Martin Asser Hansen
+\end_layout
+
+\begin_layout Publishers
+John Mattick Group
+\newline
+Institute for Molecular Bioscience
+\newline
+University of Queensland
+\newline
+Aust
+ralia
+\newline
+E-mail: mail@maasha.dk
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+
+\backslash
+thispagestyle{empty}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
+\newpage
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset LatexCommand tableofcontents
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset FloatList figure
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
+\newpage
+
+\end_layout
+
+\begin_layout Section
+Introduction
+\end_layout
+
+\begin_layout Standard
+Biopieces is a collection of bioinformatic tools that can be linked together
+ (piped as we shall call it) in a very flexible manner to perform both simple
+ and complex tasks.
+ The fundamental idea is that biopieces work on a data stream that will
+ only terminate at the end of an analysis and that this data stream can
+ be passed through several different biopieces, each performing one specific
+ task.
+ The advantage of this approach is that a user can perform simple and complex
+ tasks without having to write advanced code.
+ Moreover, since the data format used to pass data between biopieces is
+ text based, biopieces can be written by different developers in their favorite
+ programming language --- and still the biopieces will be able to work together.
+\end_layout
+
+\begin_layout Standard
+In the most simple form bioools can be piped together on the command line
+ like this (using the pipe character '|'):
+\end_layout
+
+\begin_layout LyX-Code
+read_data | calculate_something | write_result
+\end_layout
+
+\begin_layout Standard
+However, a more comprehensive analysis could be composed:
+\end_layout
+
+\begin_layout LyX-Code
+read_data | select_entries | convert_entries | search_database
+\end_layout
+
+\begin_layout LyX-Code
+evaluate_results | plot_diagram | plot_another_diagram |
+\end_layout
+
+\begin_layout LyX-Code
+load_to_database
+\end_layout
+
+\begin_layout Standard
+The data stream that is piped through the biopieces consists of records
+ of key/value pairs in the same way a hash does in order to keep as simple
+ a structure as possible.
+ An example record can be seen below:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+REC_TYPE: PATSCAN
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+MATCH: AGATCAAGTG
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+S_BEG: 7
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+S_END: 16
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ALIGN_LEN: 9
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+S_ID: piR-t6
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+STRAND: +
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+PATTERN: AGATCAAGTG
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+---
+\end_layout
+
+\begin_layout Standard
+The '
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+' denotes the delimiter of the records, and each key is a word followed
+ by a ':' and a white-space and then the value.
+ By convention the biopieces only uses upper case keys (a list of used keys
+ can be seen in Appendix\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sec:Keys"
+
+\end_inset
+
+).
+ Since the records basically are hash structures this mean that the order
+ of the keys in the stream is unordered, and in the above example it is
+ pure coincidence that HIT_BEG is displayed before HIT_END, however, when
+ the order of the keys is importent, the biopieces will automagically see
+ to that.
+\end_layout
+
+\begin_layout Standard
+All of the biopieces are able to read and write a data stream to and from
+ file as long as the records are in the biopieces format.
+ This means that if you are undertaking a lengthy analysis where one of
+ the steps is time consuming, you may save the stream after this step, and
+ subsequently start one or more analysis from that last step
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+It is a goal that the biopieces at some point will be able to dump the data
+ stream to file in case one of the tools fail critically.
+\end_layout
+
+\end_inset
+
+.
+ If you are running a lengthy analysis it is highly recommended that you
+ create a small test sample of the data and run that through the pipeline
+ --- and once you are satisfied with the result proceed with the full data
+ set (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-select-a-few-records"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Standard
+All of the biopieces can be supplied with long arguments prefixed with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+ switches or single character arguments prefixed with - switches that can
+ be grouped together (e.g.
+ -xok).
+ In this cookbook only the long switches are used to emphasize what these
+ switches do.
+\end_layout
+
+\begin_layout Section
+Setup
+\end_layout
+
+\begin_layout Standard
+In order to get the biopieces to work, you need to add environment settings
+ to include the code binaries, scripts, and modules that constitute the
+ biopieces package.
+ Assuming that you are using bash, add the following to your '~/.bashrc'
+ file using your favorite editor.
+ After the changes has been saved you need to either run 'source ~/.bashrc'
+ or relogin.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+if [ -f "/home/m.hansen/maasha/conf/bashrc" ]; then
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ source "/home/m.hansen/maasha/conf/bashrc"
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+fi
+\end_layout
+
+\begin_layout Section
+Getting Started
+\end_layout
+
+\begin_layout Standard
+The biopiece
+\series bold
+list_biopieces
+\series default
+ lists all the biopieces along with a description:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+list_biopieces
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+align_seq Align sequences in stream using Muscle.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+analyze_seq Analysis the residue composition of each sequence
+ in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+analyze_vals Determine type, count, min, max, sum and mean for
+ values in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+blast_seq BLAST sequences in stream against a specified database.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+blat_seq BLAT sequences in stream against a specified genome.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+complement_seq Complement sequences in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_records Count the number of records in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_seq Count sequences in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_vals Count the number of times values of given keys exists
+ in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+create_blast_db Create a BLAST database from sequences in stream for
+ use with BLAST.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+...
+\end_layout
+
+\begin_layout Standard
+To list the biopieces for writing different formats, you can use unix's
+ grep like this:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+list_biopieces | grep write
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_align Write aligned sequences in pretty alignment format.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_bed Write records from stream as BED lines.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_blast Write BLAST records from stream in BLAST tabular format
+ (-m8 and 9).
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_fasta Write sequences in FASTA format.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_psl Write records from stream in PSL format.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_tab Write records from stream as tab separated table.
+\end_layout
+
+\begin_layout Standard
+In order to find out how a specific biopiece works, you just type the program
+ name without any arguments and press return and the usage of the biopiece
+ will be displayed.
+ E.g.
+
+\series bold
+read_fasta
+\series default
+ <return>:
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Frameless
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Program name: read_fasta
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Contact: mail@maasha.dk
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Date: August 2007
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/
+gpl.html)
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Description: Read FASTA entries.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Usage: read_fasta [options] -i <FASTA file(s)>
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Options:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files
+ to read.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-n <int> | --num=<int>] - Limit number of records to read.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-I <file> | --stream_in=<file>] - Read input stream from file
+ - Default=STDIN
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-O <file> | --stream_out=<file>] - Write output stream to file
+ - Default=STDOUT
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Examples:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i test.fna - Read FASTA entries from file.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i test1.fna,test2,fna - Read FASTA entries from files.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i '*.fna' - Read FASTA entries from files.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i test.fna -n 10 - Read first 10 FASTA entries from
+ file.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+The Data Stream
+\end_layout
+
+\begin_layout Subsection
+How to read the data stream from file?
+\begin_inset LatexCommand label
+name "sub:How-to-read-stream"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+You want to read a data stream that you previously have saved to file in
+ biopieces format.
+ This can be done implicetly or explicitly.
+ The implicit way uses the 'stdout' stream of the Unix terminal:
+\end_layout
+
+\begin_layout LyX-Code
+cat | <biopiece>
+\end_layout
+
+\begin_layout Standard
+cat is the Unix command that reads a file and output the result to 'stdout'
+ --- which in this case is piped to any biopiece represented by the <biopiece>.
+ It is also possible to read the data stream using '<' to direct the 'stdout'
+ stream into the biopiece like this:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> < <file>
+\end_layout
+
+\begin_layout Standard
+However, that will not work if you pipe more biopieces together.
+ Then it is much safer to read the stream from a file explicitly like this:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --stream_in=<file>
+\end_layout
+
+\begin_layout Standard
+Here the filename <file> is explicetly given to the biopiece <biopiece>
+ with the switch
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_in.
+ This switch works with all biopieces.
+ It is also possible to read in data from multiple sources by repeating
+ the explicit read step:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --stream_in=<file1> | <biopiece> --stream_in=<file2>
+\end_layout
+
+\begin_layout Subsection
+How to write the data stream to file?
+\begin_inset LatexCommand label
+name "sub:How-to-write-stream"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+In order to save the output stream from a biopiece to file, so you can read
+ in the stream again at a later time, you can do one of two things:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> > <file>
+\end_layout
+
+\begin_layout Standard
+All, the biopieces write the data stream to 'stdout' by default which can
+ be written to a file by redirecting 'stdout' to file using '>' , however,
+ if one of the biopieces for writing other formats is used then the both
+ the biopieces records as well as the result output will go to 'stdout'
+ in a mixture causing havock! To avoid this you must use the switch
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_out that explictly tells the biopiece to write the output stream
+ to file:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --stream_out=<file>
+\end_layout
+
+\begin_layout Standard
+The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_out switch works with all biopieces.
+\end_layout
+
+\begin_layout Subsection
+How to terminate the data stream?
+\end_layout
+
+\begin_layout Standard
+The data stream is never stops unless the user want to save the stream or
+ by supplying the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch that will terminate the stream:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --no_stream
+\end_layout
+
+\begin_layout Standard
+The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch only works with those biopieces where it makes sense that
+ the user might want to terminale the data stream,
+\emph on
+i.e
+\emph default
+.
+ after an analysis step where the user wants to output the result, but not
+ the data stream.
+\end_layout
+
+\begin_layout Subsection
+How to write my results to file?
+\begin_inset LatexCommand label
+name "sub:How-to-write-result"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Saving the result of an analysis to file can be done implicitly or explicitly.
+ The implicit way:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --no_stream > <file>
+\end_layout
+
+\begin_layout Standard
+If you use '>' to redirect 'stdout' to file then it is important to use
+ the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch to avoid writing a mix of biopieces records and result
+ to the same file causing havock.
+ The safe way is to use the
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out switch which explicetly tells the biopiece to write the result
+ to a given file:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --result_out=<file>
+\end_layout
+
+\begin_layout Standard
+Using the above method will not terminate the stream, so it is possible
+ to pipe that into another biopiece generating different results:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece1> --result_out=<file1> | <biopiece2> --result_out=<file2>
+\end_layout
+
+\begin_layout Standard
+And still the data stream will continue unless terminated with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --result_out=<file> --no_stream
+\end_layout
+
+\begin_layout Standard
+Or written to file using implicitly or explicity
+\begin_inset LatexCommand eqref
+reference "sub:How-to-write-result"
+
+\end_inset
+
+.
+ The explicit way:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --result_out=<file1> --stream_out=<file2>
+\end_layout
+
+\begin_layout Subsection
+How to read data from multiple sources?
+\end_layout
+
+\begin_layout Standard
+To read multiple data sources, with the same type or different type of data
+ do:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece1> --data_in=<file1> | <biopiece2> --data_in=<file2>
+\end_layout
+
+\begin_layout Standard
+where type is the data type a specific biopiece reads.
+\end_layout
+
+\begin_layout Section
+Reading input
+\end_layout
+
+\begin_layout Subsection
+How to read biopieces input?
+\end_layout
+
+\begin_layout Standard
+See
+\begin_inset LatexCommand eqref
+reference "sub:How-to-read-stream"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Subsection
+How to read in data?
+\end_layout
+
+\begin_layout Standard
+Data in different formats can be read with the appropriate biopiece for
+ that format.
+ The biopieces are typicalled named 'read_<data type>' such as
+\series bold
+read_fasta
+\series default
+,
+\series bold
+read_bed
+\series default
+,
+\series bold
+read_tab
+\series default
+, etc., and all behave in a similar manner.
+ Data can be read by supplying the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+data_in switch and a file name to the file containing the data:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --data_in=<file>
+\end_layout
+
+\begin_layout Standard
+It is also possible to read in a saved biopieces stream (see
+\begin_inset LatexCommand ref
+reference "sub:How-to-read-stream"
+
+\end_inset
+
+) as well as reading data in one go:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --stream_in=<file1> --data_in=<file2>
+\end_layout
+
+\begin_layout Standard
+If you want to read data from several files you can do this:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --data_in=<file1> | <biopiece> --data_in=<file2>
+\end_layout
+
+\begin_layout Standard
+If you have several data files you can read in all explicitly with a comma
+ separated list:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --data_in=file1,file2,file3
+\end_layout
+
+\begin_layout Standard
+And it is also possible to use file globbing
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+using the short option will only work if you quote the argument -i '*.fna'
+\end_layout
+
+\end_inset
+
+:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --data_in=*.fna
+\end_layout
+
+\begin_layout Standard
+Or in a combination:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece> --data_in=file1,/dir/*.fna
+\end_layout
+
+\begin_layout Standard
+Finally, it is possible to read in data in different formats using the appropria
+te biopiece for each format:
+\end_layout
+
+\begin_layout LyX-Code
+<biopiece1> --data_in=<file1> | <biopiece2> --data_in=<file2> ...
+\end_layout
+
+\begin_layout Subsection
+How to read FASTA input?
+\end_layout
+
+\begin_layout Standard
+Sequences in FASTA format can be read explicitly using
+\series bold
+read_fasta
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file>
+\end_layout
+
+\begin_layout Subsection
+How to read alignment input?
+\end_layout
+
+\begin_layout Standard
+If your alignment if FASTA formatted then you can
+\series bold
+read_align
+\series default
+.
+ It is also possible to use
+\series bold
+read_fasta
+\series default
+ since the data is FASTA formatted, however, with
+\series bold
+read_fasta
+\series default
+ the key ALIGN will be omitted.
+ The ALIGN key is used to determine which sequences belong to what alignment
+ which is required for
+\series bold
+write_align
+\series default
+.
+\end_layout
+
+\begin_layout LyX-Code
+read_align --data_in=<file>
+\end_layout
+
+\begin_layout Subsection
+How to read tabular input?
+\begin_inset LatexCommand label
+name "sub:How-to-read-table"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Tabular input can be read with
+\series bold
+read_tab
+\series default
+ which will read in all rows and chosen columns (separated by a given delimter)
+ from a table in text format.
+\end_layout
+
+\begin_layout Standard
+The table below:
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0">
+<column alignment="left" valignment="top" width="0">
+<column alignment="left" valignment="top" width="0">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Human
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+ATACGTCAG
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+23524
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Dog
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+AGCATGAC
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+2442
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Mouse
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+GACTG
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+234
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Cat
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+AAATGCA
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+2342
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Can be read using the command:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file>
+\end_layout
+
+\begin_layout Standard
+Which will result in four records, one for each row, where the keys V0,
+ V1, V2 are the default keys for the organism, sequence, and count, respectively.
+ It is possible to select a subset of colums to read by using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+cols switch which takes a comma separated list of columns numbers (first
+ column is designated 0) as argument.
+ So to read in only the sequence and the count so that the count comes before
+ the sequence do:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file> --cols=2,1
+\end_layout
+
+\begin_layout Standard
+It is also possible to name the columns with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys switch:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file> --cols=2,1 --keys=COUNT,SEQ
+\end_layout
+
+\begin_layout Subsection
+How to read BED input?
+\end_layout
+
+\begin_layout Standard
+The BED (Browser Extensible Data
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://genome.ucsc.edu/FAQ/FAQformat"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+) format is a tabular format for data pertaining to one of the Eukaryotic
+ genomes in the UCSC genome brower
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://genome.ucsc.edu/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+.
+ The BED format consists of up to 12 columns, where the first three are
+ mandatory CHR, CHR_BEG, and CHR_END.
+ The mandatory columns and any of the optional columns can all be read in
+ easily with the
+\series bold
+read_bed
+\series default
+ biopiece.
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<file>
+\end_layout
+
+\begin_layout Standard
+It is also possible to read the BED file with
+\series bold
+read_tab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-read-table"
+
+\end_inset
+
+), however, that will be more cumbersome because you need to specify the
+ keys:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file> --keys=CHR,CHR_BEG,CHR_END ...
+\end_layout
+
+\begin_layout Subsection
+How to read PSL input?
+\end_layout
+
+\begin_layout Standard
+The PSL format is the output from BLAT and contains 21 mandatory fields
+ that can be read with
+\series bold
+read_psl
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+read_psl --data_in=<file>
+\end_layout
+
+\begin_layout Section
+Writing output
+\end_layout
+
+\begin_layout Standard
+All result output can be written explicitly to file using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out switch which all result generating biopieces have.
+ It is also possible to write the result to file implicetly by directing
+ 'stdout' to file using '>', however, that requires the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream swich to prevent a mixture of data stream and results in the file.
+ The explicit (and safe) way:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | <biopiece> --result_out=<file>
+\end_layout
+
+\begin_layout Standard
+The implicit way:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | <biopiece> --no_stream > <file>
+\end_layout
+
+\begin_layout Subsection
+How to write biopieces output?
+\end_layout
+
+\begin_layout Standard
+See
+\begin_inset LatexCommand eqref
+reference "sub:How-to-write-stream"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Subsection
+How to write FASTA output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-fasta"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+FASTA output can be written with
+\series bold
+write_fasta
+\series default
+.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_fasta --result_out=<file>
+\end_layout
+
+\begin_layout Standard
+It is also possible to wrap the sequences to a given width using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+wrap switch allthough wrapping of sequence is generally an evil thing:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_fasta --no_stream --wrap=80
+\end_layout
+
+\begin_layout Subsection
+How to write alignment output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-alignment"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Pretty alignments with ruler
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+'.' for every 10 residues, ':' for every 50, and '|' for every 100
+\end_layout
+
+\end_inset
+
+ and consensus sequence
+\begin_inset Note Note
+status collapsed
+
+\begin_layout Standard
+which reminds me to make that an option.
+\end_layout
+
+\end_inset
+
+ can be created with
+\series bold
+write_align
+\series default
+, what also have the optional
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+wrap switch to break the alignment into blocks of a given width:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_align --result_out=<file> --wrap=80
+\end_layout
+
+\begin_layout Standard
+If the number of sequnces in the alignment is 2 then a pairwise alignment
+ will be output otherwise a multiple alignment.
+ And if the sequence type, determined automagically, is protein, then residues
+ and symbols (+,\InsetSpace ~
+:,\InsetSpace ~
+.) will be used to show consensus according to the Blosum62
+ matrix.
+\end_layout
+
+\begin_layout Subsection
+How to write tabular output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-tab"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Outputting the data stream as a table can be done with
+\series bold
+write_tab
+\series default
+, which will write generate one row per record with the values as columns.
+ If you supply the optional
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+comment switch, when the first row in the table will be a 'comment' line
+ prefixed with a '#':
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --comment
+\end_layout
+
+\begin_layout Standard
+You can also change the delimiter from the default (tab) to
+\emph on
+e.g.
+
+\emph default
+ ',':
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --delimit=','
+\end_layout
+
+\begin_layout Standard
+If you want the values output in a specific order you have to supply a comma
+ separated list using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys switch that will print only those keys in that order:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --keys=SEQ_NAME,COUNT
+\end_layout
+
+\begin_layout Standard
+Alternatively, if you have some keys that you don't want in the tabular
+ output, use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_keys switch.
+ So to print all keys except SEQ and SEQ_TYPE do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --no_keys=SEQ,SEQ_TYPE
+\end_layout
+
+\begin_layout Standard
+Finally, if you have a stream containing a mix of different records types,
+
+\emph on
+e.g.
+
+\emph default
+ records with sequences and records with matches, then you can use
+\series bold
+write_tab
+\series default
+ to output all the records in tabluar format, however, the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+comment,
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys, and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_keys switches will only respond to records of the first type encountered.
+ The reason is that outputting mixed records is probably not what you want
+ anyway, and you should remove all the unwanted records from the stream
+ before outputting the table:
+\series bold
+grab
+\series default
+ is your friend (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-grab"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to write a BED output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-BED"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Data in BED format can be output if the records contain the mandatory keys
+ CHR, CHR_BEG, and CHR_END using
+\series bold
+write_bed
+\series default
+.
+ If the optional keys are also present, they will be output as well:
+\end_layout
+
+\begin_layout LyX-Code
+write_bed --result_out=<file>
+\end_layout
+
+\begin_layout Subsection
+How to write PSL output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-PSL"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Data in PSL format can be output using
+\series bold
+write_psl:
+\end_layout
+
+\begin_layout LyX-Code
+write_psl --result_out=<file>
+\end_layout
+
+\begin_layout Section
+Manipulating Records
+\end_layout
+
+\begin_layout Subsection
+How to select a few records?
+\begin_inset LatexCommand label
+name "sub:How-to-select-a-few-records"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+To quickly get an overview of your data you can limit the data stream to
+ show a few records.
+ This also very useful to test the pipeline with a few records if you are
+ setting up a complex analysis using several biopieces.
+ That way you can inspect that all goes well before analyzing and waiting
+ for the full data set.
+ All of the read_<type> biopieces have the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num switch which will take a number as argument and only that number of
+ records will be read.
+ So to read in the first 10 FASTA entries from a file:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna --num=10
+\end_layout
+
+\begin_layout Standard
+Another way of doing this is to use
+\series bold
+head_records
+\series default
+ will limit the stream to show the first 10 records (default):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | head_records
+\end_layout
+
+\begin_layout Standard
+Using
+\series bold
+head_records
+\series default
+ directly after one of the read_<type> biopieces will be a lot slower than
+ using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num switch with the read_<type> biopieces, however,
+\series bold
+head_records
+\series default
+ can also be used to limit the output from all the other biopieces.
+ It is also possible to give
+\series bold
+head_records
+\series default
+ a number of records to show using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num switch.
+ So to display the first 100 records do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | head_records --num=100
+\end_layout
+
+\begin_layout Subsection
+How to select random records?
+\begin_inset LatexCommand label
+name "sub:How-to-select-random-records"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+If you want to inspect a number of random records from the stream this can
+ be done with the
+\series bold
+random_records
+\series default
+ biopiece.
+ So if you have 1 mio records in the stream and you want to select 1000
+ random records do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | random_records --num=1000
+\end_layout
+
+\begin_layout Subsection
+How to count all records in the data stream?
+\end_layout
+
+\begin_layout Standard
+To count all the records in the data stream use
+\series bold
+count_records
+\series default
+, which adds one record (which is not included in the count) to the data
+ stream.
+ So to count the number of sequences in a FASTA file you can do this:
+\end_layout
+
+\begin_layout LyX-Code
+cat test.fna | read_fasta | count_records --no_stream
+\end_layout
+
+\begin_layout Standard
+Which will write the last record containing the count to 'stdout':
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_records: 630
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+---
+\end_layout
+
+\begin_layout Standard
+It is also possible to write the count to file using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out switch.
+\end_layout
+
+\begin_layout Subsection
+How to get the length of record values?
+\begin_inset LatexCommand label
+name "sub:How-to-get-value_length"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Use the
+\series bold
+length_vals
+\series default
+ biopiece to get the length of each value for a comma separated list of
+ keys:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | length_vals --keys=HIT,PATTERN
+\end_layout
+
+\begin_layout Subsection
+How to grab specific records?
+\begin_inset LatexCommand label
+name "sub:How-to-grab"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The biopiece
+\series bold
+grab
+\series default
+ is related to the Unix grep and locates records based on matching keys
+ and/or values using either a pattern, a Perl regex, or a numerical evaluation.
+ To easily
+\series bold
+grab
+\series default
+ all records in the stream that has any mentioning of the pattern 'human'
+ just pipe the data stream through
+\series bold
+grab
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human
+\end_layout
+
+\begin_layout Standard
+This will search for the pattern 'human' in all keys and all values.
+ The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern switch takes a comma separated list of patterns, so in order to
+ match multiple patterns do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human,mouse
+\end_layout
+
+\begin_layout Standard
+It is also possible to use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in switch instead of
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern.
+
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in is used to read a file with one pattern per line:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern_in=patterns.txt
+\end_layout
+
+\begin_layout Standard
+If you want the opposite result --- to find all records that does not match
+ the patterns, add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+invert switch, which not only works with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern switch, but also with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+regex and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+eval:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human --invert
+\end_layout
+
+\begin_layout Standard
+If you want to search the record keys only,
+\emph on
+e.g.
+
+\emph default
+ to find all records containing the key SEQ you can add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys_only switch.
+ This will prevent matching of SEQ in any record value, and in fact SEQ
+ is a not uncommon peptide sequence you could get an unwanted record.
+ Also, this will give an increase in speed since only the keys are searched:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=SEQ --keys_only
+\end_layout
+
+\begin_layout Standard
+However, if you are interested in finding the peptide sequence SEQ and not
+ the SEQ key, just add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+vals_only switch instead:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=SEQ --vals_only
+\end_layout
+
+\begin_layout Standard
+Also, if you want to grab for certain key/value pairs you can supply a comma
+ separated list of keys whos values will then be searched using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys switch.
+ This is handy if your records contain large genomic sequences and you dont
+ want to search the entire sequence for
+\emph on
+e.g.
+
+\emph default
+ the organism name --- it is much faster to tell
+\series bold
+grab
+\series default
+ which keys to search the value for:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human --keys=SEQ_NAME
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout Standard
+It is also possible to invoke flexible matching using regex (regular expressions
+) instead of simple pattern matching.
+ In
+\series bold
+grab
+\series default
+ the regex engine is Perl based and allows use of different type of wild
+ cards, alternatives,
+\emph on
+etc
+\emph default
+
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://perldoc.perl.org/perlreref.html"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+.
+ If you want to
+\series bold
+grab
+\series default
+ records withs the sequence ATCG or GCTA you can do this:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --regex='ATCG|GCTA'
+\end_layout
+
+\begin_layout Standard
+Or if you want to find sequences beginning with ATCG:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --regex='^ATCG'
+\end_layout
+
+\begin_layout Standard
+You can also use
+\series bold
+grab
+\series default
+ to locate records that fulfill a numerical property using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+eval switch witch takes an expression in three parts.
+ The first part is the key that holds the value we want to evaluate, the
+ second part holds one the six operators:
+\end_layout
+
+\begin_layout Enumerate
+Greater than: >
+\end_layout
+
+\begin_layout Enumerate
+Greater than or equal to: >=
+\end_layout
+
+\begin_layout Enumerate
+Less than: <
+\end_layout
+
+\begin_layout Enumerate
+Less than or equal to: <=
+\end_layout
+
+\begin_layout Enumerate
+Equal to: =
+\end_layout
+
+\begin_layout Enumerate
+Not equal to: !=
+\end_layout
+
+\begin_layout Enumerate
+String wise equal to: eq
+\end_layout
+
+\begin_layout Enumerate
+String wise not equal to: ne
+\end_layout
+
+\begin_layout Standard
+And finally comes the number used in the evaluation.
+ So to
+\series bold
+grab
+\series default
+ all records with a sequence length greater than 30:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ length_seq | grab --eval='SEQ_LEN > 30'
+\end_layout
+
+\begin_layout Standard
+If you want to locate all records containing the pattern 'human' and where
+ the sequence length is greater that 30, you do this by running the stream
+ through
+\series bold
+grab
+\series default
+ twice:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern='human' | length_seq | grab --eval='SEQ_LEN > 30'
+\end_layout
+
+\begin_layout Standard
+Finally, it is possible to do fast matching of expressions from a file using
+ the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+exact switch.
+ Each of these expressions has to be matched exactly over the entrie length,
+ which if useful if you have a file with accession numbers, that you want
+ to locate in the stream:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --exact acc_no.txt | ...
+\end_layout
+
+\begin_layout Standard
+Using
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+exact is much faster than using
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in, because with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+exact the expression has to be complete matches, where
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in looks for subpatterns.
+\end_layout
+
+\begin_layout Standard
+NB! To get the best speed performance, use the most restrictive
+\series bold
+grab
+\series default
+ first.
+\end_layout
+
+\begin_layout Subsection
+How to remove keys from records?
+\end_layout
+
+\begin_layout Standard
+To remove one or more specific keys from all records in the data stream
+ use
+\series bold
+remove_keys
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | remove_keys --keys=SEQ,SEQ_NAME
+\end_layout
+
+\begin_layout Standard
+In the above example SEQ and SEQ_NAME will be removed from all records if
+ they exists in these.
+ If all keys are removed from a record, then the record will be removed.
+\end_layout
+
+\begin_layout Subsection
+How to rename keys in records?
+\end_layout
+
+\begin_layout Standard
+Sometimes you want to rename a record key,
+\emph on
+e.g.
+
+\emph default
+ if you have read in a two column table with sequence name and sequence
+ in each column (see
+\begin_inset LatexCommand ref
+reference "sub:How-to-read-table"
+
+\end_inset
+
+) without specifying the key names, then the sequence name will be called
+ V0 and the sequence V1 as default in the
+\series bold
+read_tab
+\series default
+ biopiece.
+ To rename the V0 and V1 keys we need to run the stream through
+\series bold
+rename_keys
+\series default
+ twice (one for each key to rename):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | rename_keys --keys=V0,SEQ_NAME | rename_keys --keys=V1,SEQ
+\end_layout
+
+\begin_layout Standard
+The first instance of
+\series bold
+rename_keys
+\series default
+ replaces all the V0 keys with SEQ_NAME, and the second instance of
+\series bold
+rename_keys
+\series default
+ replaces all the V1 keys with SEQ.
+
+\emph on
+Et viola
+\emph default
+ the data can now be used in the biopieces that requires these keys.
+\end_layout
+
+\begin_layout Section
+Manipulating Sequences
+\end_layout
+
+\begin_layout Subsection
+How to get sequence lengths?
+\end_layout
+
+\begin_layout Standard
+The length for sequences in records can be determined with
+\series bold
+length_seq
+\series default
+, which adds the key SEQ_LEN to each record with the sequence length as
+ the value.
+ It also generates an extra record that is emitted last with the key TOTAL_SEQ_L
+EN showing the total length of all the sequences.
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_seq
+\end_layout
+
+\begin_layout Standard
+It is also possible to determine the sequence length using the generic tool
+
+\series bold
+length_vals
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-get-value_length"
+
+\end_inset
+
+, which determines the length of the values for a given list of keys:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_vals --keys=SEQ
+\end_layout
+
+\begin_layout Standard
+To obtain the total length of all sequences use
+\series bold
+sum_vals
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_vals --keys=SEQ
+\end_layout
+
+\begin_layout LyX-Code
+| sum_vals --keys=SEQ_LEN
+\end_layout
+
+\begin_layout Standard
+The biopiece
+\series bold
+analyze_seq
+\series default
+ will also determine the length of each sequence (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-analyze"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to analyze sequence composition?
+\begin_inset LatexCommand label
+name "sub:How-to-analyze"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+If you want to find out the sequence type, composition, length, as well
+ as GC content, indel content and proportions of soft and hard masked sequence,
+ then use
+\series bold
+analyze_seq
+\series default
+.
+ This handy biopiece will determine all these things per sequence from which
+ it is easy to get an overview using the
+\series bold
+write_tab
+\series default
+ biopiece to output a table (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-tab"
+
+\end_inset
+
+).
+ So in order to determine the sequence composition of a FASTA file with
+ just one entry containing the sequence 'ATCG' we just read the data with
+
+\series bold
+read_fasta
+\series default
+ and run the output through
+\series bold
+analyze_seq
+\series default
+ which will add the analysis to the record like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq ...
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:D: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+MIX_INDEX: 0.55
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:W: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:G: 16
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SOFT_MASK%: 63.75
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:B: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:V: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+HARD_MASK%: 0.00
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:H: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:S: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:N: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:.: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+GC%: 35.00
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:A: 8
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:Y: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:M: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:T: 44
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SEQ_TYPE: DNA
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:K: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:~: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SEQ: TTTCAGTTTGGGACGGAGTAAGGCCTTCCtttttttttttttttttttttttttttttgagaccgagtcttgctc
+tgtcg
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SEQ_LEN:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+80 RES:R: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:C: 12
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:-: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:U: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+---
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout Standard
+Now to make a table of how may As, Ts, Cs, and Gs you can add the following:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | analyze_seq | write_tab --keys=RES:A,RES:T,RES:C,RES:G
+\end_layout
+
+\begin_layout Standard
+Or if you want to see the proportions of hard and soft masked sequence:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | analyse_seq | write_tab --keys=HARD_MASK%,SOFT_MASK%
+\end_layout
+
+\begin_layout Standard
+If you have a stack of sequences in one file and you want to determine the
+ mean GC content you can do it using the
+\series bold
+mean_vals
+\series default
+ biopiece:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq | mean_vals --keys=GC%
+\end_layout
+
+\begin_layout Standard
+Or if you want the total count of Ns you can use
+\series bold
+sum_vals
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq | sum_vals --keys=RES:N
+\end_layout
+
+\begin_layout Standard
+The MIX_INDEX key is calculated as the count of the most common residue
+ over the sequence length, and can be used as a cut-off for removing sequence
+ tags consisting of mostly one nucleotide:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq | grab --eval='MIX_INDEX<0.85'
+\end_layout
+
+\begin_layout Subsection
+How to extract subsequences?
+\begin_inset LatexCommand label
+name "sub:How-to-extract"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+In order to extract a subsequence from a longer sequence use the biopiece
+ extract_seq, which will replace the sequence in the record with the subsequence
+ (this behaviour should probably be modified to be dependant of a
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+replace or a
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_replace switch
+\begin_inset Note Note
+status collapsed
+
+\begin_layout Standard
+also in split_seq
+\end_layout
+
+\end_inset
+
+).
+ So to extract the first 20 residues from all sequences do (first residue
+ is designated 1):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | extract_seq --beg=1 --len=20
+\end_layout
+
+\begin_layout Standard
+You can also specify a begin and end coordinate set:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | extract_seq --beg=20 --end=40
+\end_layout
+
+\begin_layout Standard
+If you want the subsequences from position 20 to the sequence end do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | extract_seq --beg=20
+\end_layout
+
+\begin_layout Standard
+If you want to extract subsequences a given distance from the sequence end
+ you can do this by reversing the sequence with the biopiece
+\series bold
+reverse_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-reverse-seq"
+
+\end_inset
+
+, followed by
+\series bold
+extract_seq
+\series default
+ to get the subsequence, and then
+\series bold
+reverse_seq
+\series default
+ again to get the subsequence back in the original orientation:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | reverse_seq
+\end_layout
+
+\begin_layout LyX-Code
+| extract_seq --beg=10 --len=10 | reverse_seq
+\end_layout
+
+\begin_layout Subsection
+How to get genomic sequence?
+\begin_inset LatexCommand label
+name "sub:How-to-get-genomic-sequence"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The biopiece
+\series bold
+get_genomic_seq
+\series default
+ can extract subsequences for a given genome specified with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+genome switch explicitly using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+beg and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+end/
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+len switches:
+\end_layout
+
+\begin_layout LyX-Code
+get_genome_seq --genome=<genome> --beg=1 --len=100
+\end_layout
+
+\begin_layout Standard
+Alternatively,
+\series bold
+get_genome_seq
+\series default
+ can be used to append the corresponding sequence to BED, PSL, and BLAST
+ records:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<BED file> | get_genome_seq --genome=<genome>
+\end_layout
+
+\begin_layout Standard
+It is also possible to include flaking sequence using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+flank switch.
+ So to include 50 nucleotides upstream and 50 nucleotides downstream for
+ each BED entry do:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<BED file> | get_genome_seq --genome=<genome> --flank=50
+\end_layout
+
+\begin_layout Subsection
+How to upper-case sequences?
+\end_layout
+
+\begin_layout Standard
+Sequences can be shifted from lower case to upper case using
+\series bold
+uppercase_seq
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | uppercase_seq
+\end_layout
+
+\begin_layout Subsection
+How to reverse sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-reverse-seq"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The order of residues in a sequence can be reversed using reverse_seq:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | reverse_seq
+\end_layout
+
+\begin_layout Standard
+Note that in order to reverse/complement a sequence you also need the
+\series bold
+complement_seq
+\series default
+ biopiece (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-complement"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to complement sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-complement"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+DNA and RNA sequences can be complemented with
+\series bold
+complement_seq
+\series default
+, which automagically determines the sequence type:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | complement_seq
+\end_layout
+
+\begin_layout Standard
+Note that in order to reverse/complement a sequence you also need the
+\series bold
+reverse_seq
+\series default
+ biopiece (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-reverse-seq"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to remove indels from sequnces?
+\end_layout
+
+\begin_layout Standard
+Indels can be removed from sequences with the
+\series bold
+remove_indels
+\series default
+ biopiece.
+ This is useful if you have aligned some sequences (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-align"
+
+\end_inset
+
+) and extracted (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-extract"
+
+\end_inset
+
+) a block of subsequences from the alignment and you want to use these sequence
+ in a search where you need to remove the indels first.
+ '-', '~', and '.' are considered indels:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | remove_indels
+\end_layout
+
+\begin_layout Subsection
+How to shuffle sequences?
+\end_layout
+
+\begin_layout Standard
+All residues in sequences in the stream can be shuffled to random positions
+ using the
+\series bold
+shuffle_seq
+\series default
+ biopiece:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | shuffle_seq
+\end_layout
+
+\begin_layout Subsection
+How to split sequences into overlapping subsequences?
+\end_layout
+
+\begin_layout Standard
+Sequences can be slit into overlapping subsequences with the
+\series bold
+split_seq
+\series default
+ biopiece.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | split_seq --word_size=20 --uniq
+\end_layout
+
+\begin_layout Subsection
+How to determine the oligo frequency?
+\end_layout
+
+\begin_layout Standard
+In order to determine if any oligo usage is over represented in one or more
+ sequences you can determine the frequency of oligos of a given size with
+
+\series bold
+oligo_freq
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | oligo_freq --word_size=4
+\end_layout
+
+\begin_layout Standard
+And if you have more than one sequence and want to accumulate the frequences
+ you need the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+all switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | oligo_freq --word_size=4 --all
+\end_layout
+
+\begin_layout Standard
+To get a meaningful result you need to write the resulting frequencies as
+ a table with
+\series bold
+write_tab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-tab"
+
+\end_inset
+
+), but first it is important to
+\series bold
+grab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-grab"
+
+\end_inset
+
+) the records with the frequencies to avoid full length sequences in the
+ table:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | oligo_freq --word_size=4 --all | grab --pattern=OLIGO --keys_only
+\end_layout
+
+\begin_layout LyX-Code
+| write_tab --no_stream
+\end_layout
+
+\begin_layout Standard
+And the resulting frequency table can be sorted with Unix sort (man sort).
+\end_layout
+
+\begin_layout Subsection
+How to search for sequences in genomes?
+\end_layout
+
+\begin_layout Standard
+See the following biopiece:
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+patscan_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-patscan"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+blat_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAT"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+blast_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAST"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+vmatch_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-Vmatch"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to search sequences for a pattern?
+\begin_inset LatexCommand label
+name "sub:How-to-use-patscan"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+It is possible to search sequences in the data stream for patterns using
+ the
+\series bold
+patscan_seq
+\series default
+ biopiece which utilizes the powerful scan_for_matches engine.
+ Consult the documentation for scan_for_matches in order to learn how to
+ define patterns (the documentation is included in Appendix\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sec:scan_for_matches-README"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Standard
+To search all sequences for a simple pattern consisting of the sequence
+ ATCGATCG allowing for 3 mismatches, 2 insertions and 1 deletion:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | patscan_seq --pattern='ATCGATCG[3,2,1]'
+\end_layout
+
+\begin_layout Standard
+The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern switch takes a comma seperated list of patterns, so if you want
+ to search for more that one pattern do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern='ATCGATCG[3,2,1],GCTAGCTA[3,2,1]'
+\end_layout
+
+\begin_layout Standard
+It is also possible to have a list of different patterns to search for in
+ a file with one pattern per line.
+ In order to get
+\series bold
+patscan_seq
+\series default
+ to read these patterns use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern_in=<file>
+\end_layout
+
+\begin_layout Standard
+To also scan the complementary strand in nucleotide sequences (
+\series bold
+patscan_seq
+\series default
+ automagically determines the sequence type) you need to add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+comp switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern=<pattern> --comp
+\end_layout
+
+\begin_layout Standard
+It is also possible to use
+\series bold
+patscan_seq
+\series default
+ to output those records that does not contain a certain pattern by using
+ the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+invert switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern=<pattern> --invert
+\end_layout
+
+\begin_layout Standard
+Finally,
+\series bold
+patscan_seq
+\series default
+ can also scan for patterns in a given genome sequence, instead of sequences
+ in the stream, using the
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+genome switch:
+\end_layout
+
+\begin_layout LyX-Code
+patscan --pattern=<pattern> --genome=<genome>
+\end_layout
+
+\begin_layout Subsection
+How to use BLAT for sequence search?
+\begin_inset LatexCommand label
+name "sub:How-to-use-BLAT"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Sequences in the data stream can be matched against supported genomes using
+
+\series bold
+blat_seq
+\series default
+ which is a biopiece using BLAT as the name might suggest.
+ Currently only Mouse and Human genomes are available and it is not possible
+ to use OOC files since there is still a need for a local repository for
+ genome files.
+ Otherwise it is just:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | blat_seq --genome=<genome>
+\end_layout
+
+\begin_layout Standard
+The search results can then be written to file with
+\series bold
+write_psl
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-PSL"
+
+\end_inset
+
+) or
+\series bold
+write_bed
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-BED"
+
+\end_inset
+
+) allthough with
+\series bold
+write_bed
+\series default
+ some information will be lost).
+ It is also possible to plot chromosome distribution of the search results
+ using
+\series bold
+plot_chrdist
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-plot-chrdist"
+
+\end_inset
+
+) or the distribution of the match lengths using
+\series bold
+plot_lendist
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-plot-lendist"
+
+\end_inset
+
+) or a karyogram with the hits using
+\series bold
+plot_karyogram
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-plot-karyogram"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to use BLAST for sequence search?
+\begin_inset LatexCommand label
+name "sub:How-to-use-BLAST"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Two biopieces exist for blasting sequences:
+\series bold
+create_blast_db
+\series default
+ is used to create the BLAST database required for BLAST which is queried
+ using the biopiece
+\series bold
+blast_seq
+\series default
+.
+ So in order to create a BLAST database from sequences in the data stream
+ you simple run:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_blast_db --database=my_database --no_stream
+\end_layout
+
+\begin_layout Standard
+The type of sequence to use for the database is automagically determined
+ by
+\series bold
+create_blast_db
+\series default
+, but don't have a mixture of peptide and nucleic acids sequences in the
+ stream.
+ The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+database switch takes a path as argument, but will default to 'blastdb_<time_sta
+mp> if not set.
+\end_layout
+
+\begin_layout Standard
+The resulting database can now be queried with sequences in another data
+ stream using
+\series bold
+blast_seq
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | blast_seq --database=my_database
+\end_layout
+
+\begin_layout Standard
+Again, the sequence type is determined automagically and the appropriate
+ BLAST program is guessed (see below table), however, the program name can
+ be overruled with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+program switch.
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<row bottomline="true">
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Subject sequence
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Query sequence
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Program guess
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+blastn
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+blastp
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+blastx
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+tblastn
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Finally, it is also possible to use
+\series bold
+blast_seq
+\series default
+ for blasting sequences agains a preformatted genome using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+genome switch instead of the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+database switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | blast_seq --genome=<genome>
+\end_layout
+
+\begin_layout Subsection
+How to use Vmatch for sequence search?
+\begin_inset LatexCommand label
+name "sub:How-to-use-Vmatch"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The powerful suffix array software package Vmatch
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://www.vmatch.de/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+ can be used for exact mapping of sequences against indexed genomes using
+ the biopiece
+\series bold
+vmatch_seq
+\series default
+, which will e.g.
+ map 700000 ESTs to the human genome locating all 160 mio hits in less than
+ an hour.
+ Only nucleotide sequences and sequences longer than 11 nucleotides will
+ be mapped.
+ It is recommended that sequences consisting of mostly one nucleotide type
+ are removed.
+ This can be done with the
+\series bold
+analyze_seq
+\series default
+ biopiece
+\begin_inset LatexCommand eqref
+reference "sub:How-to-analyze"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome>
+\end_layout
+
+\begin_layout Standard
+It is also possible to allow for mismatches using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+hamming_dist switch.
+ So to allow for 2 mismatches:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=2
+\end_layout
+
+\begin_layout Standard
+Or to allow for 10% mismathing nucleotides:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=10p
+\end_layout
+
+\begin_layout Standard
+To allow both indels and mismatches use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+edit_dist switch.
+ So to allow for one mismatch or one indel:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=1
+\end_layout
+
+\begin_layout Standard
+Or to allow for 5% indels or mismatches:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=5p
+\end_layout
+
+\begin_layout Standard
+Note that using
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+hamming_dist or
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+edit_dist greatly slows down vmatch considerably --- use with care.
+\end_layout
+
+\begin_layout Standard
+The resulting SCORE key can be replaced to hold the number of genome matches
+ of a given sequence (multi-mappers) is the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+count switch is given.
+\end_layout
+
+\begin_layout Subsection
+How to find all matches between sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-find-matches"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+All matches between two sequences can be determined with the biopiece
+\series bold
+match_seq
+\series default
+.
+ The match finding engine underneath the hood of
+\series bold
+match_seq
+\series default
+ is the super fast suffix tree program MUMmer
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://mummer.sourceforge.net/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+, which will locate all forward and reverse matches between huge sequences
+ in a matter of minutes (if the repeat count is not too high and if the
+ word size used is appropriate).
+ Matching two
+\emph on
+Helicobacter pylori
+\emph default
+ genomes (1.7Mbp) takes around 10 seconds:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | match_seq --word_size=20 --direction=both
+\end_layout
+
+\begin_layout Standard
+The output from
+\series bold
+match_seq
+\series default
+ can be used to generate a dot plot with
+\series bold
+plot_matches
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-generate-dotplot"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to align sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-align"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Sequences in the stream can be aligned with the
+\series bold
+align_seq
+\series default
+ biopiece that uses Muscle
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://www.drive5.com/muscle/muscle.html"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+ as aligment engine.
+ Currently you cannot change any of the Muscle alignment parameters and
+
+\series bold
+align_seq
+\series default
+ will create an alignment based on the defaults (which are really good!):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | align_seq
+\end_layout
+
+\begin_layout Standard
+The aligned output can be written to file in FASTA format using
+\series bold
+write_fasta
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-fasta"
+
+\end_inset
+
+) or in pretty text using
+\series bold
+write_align
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-alignment"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to create a weight matrix?
+\end_layout
+
+\begin_layout Standard
+If you want a weight matrix to show the sequence composition of a stack
+ of sequences you can use the biopiece create_weight_matrix:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_weight_matrix
+\end_layout
+
+\begin_layout Standard
+The result can be output in percent using the
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+percent switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_weight_matrix --percent
+\end_layout
+
+\begin_layout Standard
+The weight matrix can be written as tabular output with
+\series bold
+write_tab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-tab"
+
+\end_inset
+
+) after removeing the records containing SEQ with
+\series bold
+grab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-grab"
+
+\end_inset
+
+):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_weight_matrix | grab --invert --keys=SEQ --keys_only
+\end_layout
+
+\begin_layout LyX-Code
+| write_tab --no_stream
+\end_layout
+
+\begin_layout Standard
+The V0 column will hold the residue, while the rest of the columns will
+ hold the frequencies for each sequence position.
+\end_layout
+
+\begin_layout Section
+Plotting
+\end_layout
+
+\begin_layout Standard
+There exists several biopieces for plotting.
+ Some of these are based on GNUplot
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://www.gnuplot.info/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+, which is an extremely powerful platform to generate all sorts of plots
+ and even though GNUplot has quite a steep learning curve, the biopieces
+ utilizing GNUplot are simple to use.
+ GNUplot is able to output a lot of different formats (called terminals
+ in GNUplot), but the biopieces focusses on three formats only:
+\end_layout
+
+\begin_layout Enumerate
+The 'dumb' terminal is default to the GNUplot based biopieces and will output
+ a plot in crude ASCII text (Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Dumb-terminal"
+
+\end_inset
+
+).
+ This is quite nice for a quick and dirty plot to get an overview of your
+ data .
+\end_layout
+
+\begin_layout Enumerate
+The 'post' or 'postscript' terminal output postscript code which is publication
+ grade graphics that can be viewed with applications such as Ghostview,
+ Photoshop, and Preview.
+\end_layout
+
+\begin_layout Enumerate
+The 'svg' terminal output's scalable vector graphics (SVG) which is a vector
+ based format.
+ SVG is great because you can edit the resulting plot using Photoshop or
+ Inkscape
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+Inkscape is a really handy drawing program that is free and open source.
+ Availble at
+\begin_inset LatexCommand htmlurl
+target "http://www.inkscape.org"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+ if you want to add additional labels, captions, arrows, and so on and then
+ save the result in different formats, such as postscript without loosing
+ resolution.
+\end_layout
+
+\begin_layout Standard
+The biopieces for plotting that are not based on GNUplot only output SVG
+ (that may change in the future).
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename lendist_ascii.png
+ lyxscale 70
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Dumb-terminal"
+
+\end_inset
+
+Dumb terminal
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+The output of a length distribution plot in the default 'dumb terminal'
+ to the terminal window.
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a histogram?
+\begin_inset LatexCommand label
+name "How-to-plot-histogram"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+A generic histogram for a given value can be plotted with the biopiece
+\series bold
+plot_histogram
+\series default
+ (Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Histogram"
+
+\end_inset
+
+):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_histogram --key=TISSUE --no_stream
+\end_layout
+
+\begin_layout Standard
+(Figure missing)
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align left
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename histogram.png
+ lyxscale 70
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Histogram"
+
+\end_inset
+
+Histogram
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a length distribution?
+\begin_inset LatexCommand label
+name "sub:How-to-plot-lendist"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Plotting of length distributions, weather sequence lengths, patterns lengths,
+ hit lengths,
+\emph on
+etc.
+
+\emph default
+ is a really handy thing and can be done with the the biopiece
+\series bold
+plot_lendist
+\series default
+.
+ If you have a file with FASTA entries and want to plot the length distribution
+ you do it like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_seq
+\end_layout
+
+\begin_layout LyX-Code
+| plot_lendist --key=SEQ_LEN --no_stream
+\end_layout
+
+\begin_layout Standard
+The result will be written to the default dumb terminal and will look like
+ Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Dumb-terminal"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+If you instead want the result in postscript format you can do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_lendist --key=SEQ_LEN --terminal=post --result_out=file.ps
+\end_layout
+
+\begin_layout Standard
+That will generate the plot and save it to file, but not interrupt the data
+ stream which can then be used in further analysis.
+ You can also save the plot implicetly using '>', however, it is then important
+ to terminate the stream with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_lendist --key=SEQ_LEN --terminal=post --no_stream > file.ps
+\end_layout
+
+\begin_layout Standard
+The resulting plot can be seen in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Length-distribution"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename lendist.ps
+ lyxscale 50
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Length-distribution"
+
+\end_inset
+
+Length distribution
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+Length distribution of 630 piRNA like RNAs.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a chromosome distribution?
+\begin_inset LatexCommand label
+name "sub:How-to-plot-chrdist"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+If you have the result of a sequence search against a multi chromosome genome,
+ it is very practical to be able to plot the distribution of search hits
+ on the different chromosomes.
+ This can be done with
+\series bold
+plot_chrdist
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | blat_genome | plot_chrdist --no_stream
+\end_layout
+
+\begin_layout Standard
+The above example will result in a crude plot using the 'dumb' terminal,
+ and if you want to mess around with the results from the BLAT search you
+ probably want to save the result to file first (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-PSL"
+
+\end_inset
+
+).
+ To plot the chromosome distribution from the saved search result you can
+ do:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=file.bed | plot_chrdist --terminal=post --result_out=plot.ps
+\end_layout
+
+\begin_layout Standard
+That will result in the output show in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Chromosome-distribution"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename chrdist.ps
+ lyxscale 50
+ width 12cm
+ rotateAngle 90
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Chromosome-distribution"
+
+\end_inset
+
+Chromosome distribution
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to generate a dotplot?
+\begin_inset LatexCommand label
+name "sub:How-to-generate-dotplot"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+A dotplot is a powerful way to get an overview of the size and location
+ of sequence insertions, deletions, and duplications between two sequences.
+ Generating a dotplot with biopieces is a two step process where you initially
+ find all matches between two sequences using the tool
+\series bold
+match_seq
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-find-matches"
+
+\end_inset
+
+) and plot the resulting matches with
+\series bold
+plot_matches
+\series default
+.
+ Matching and plotting two
+\emph on
+Helicobacter pylori
+\emph default
+ genomes (1.7Mbp) takes around 10 seconds:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | match_seq | plot_matches --terminal=post --result_out=plot.ps
+\end_layout
+
+\begin_layout Standard
+The resulting dotplot is in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Dotplot"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename dotplot.ps
+ lyxscale 50
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Dotplot"
+
+\end_inset
+
+Dotplot
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+Forward matches are displayed in green while reverse matches are displayed
+ in red.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a sequence logo?
+\end_layout
+
+\begin_layout Standard
+Sequence logos can be generate with
+\series bold
+plot_seqlogo
+\series default
+.
+ The sequnce type is determined automagically and an entropy scale of 2
+ bits and 4 bits is used for nucleotide and peptide sequences, respectively
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand htmlurl
+target "http://www.ccrnp.ncifcrf.gov/~toms/paper/hawaii/latex/node5.html"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_seqlogo --no_stream --result_out=seqlogo.svg
+\end_layout
+
+\begin_layout Standard
+An example of a sequence logo can be seen in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Sequence-logo"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename seqlogo.png
+ lyxscale 50
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Sequence-logo"
+
+\end_inset
+
+Sequence logo
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a karyogram?
+\begin_inset LatexCommand label
+name "sub:How-to-plot-karyogram"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+To plot search hits on genomes use
+\series bold
+plot_karyogram
+\series default
+, which will output a nice karyogram in SVG graphics:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_karyogram --result_out=karyogram.svg
+\end_layout
+
+\begin_layout Standard
+The banding data is taken from the UCSC genome browser database and currently
+ only Human and Mouse is supported.
+ Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Karyogram"
+
+\end_inset
+
+ shows the distribution of piRNA like RNAs matched to the Human genome.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename karyogram.png
+ lyxscale 35
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Karyogram"
+
+\end_inset
+
+Karyogram
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+Hits from a search of piRNA like RNAs in the Human genome is displayed as
+ short horizontal bars.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+Uploading Results
+\end_layout
+
+\begin_layout Subsection
+How do I display my results in the UCSC Genome Browser?
+\end_layout
+
+\begin_layout Standard
+Results from the list of biopieces below can be uploaded directly to a local
+ mirror of the UCSC Genome Browser using the biopiece
+\series bold
+upload_to_ucsc
+\series default
+:
+\end_layout
+
+\begin_layout Itemize
+patscan_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-patscan"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+blat_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAT"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+blast_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAST"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+vmatch_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-Vmatch"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The syntax for uploading data the most simple way requires two mandatory
+ switches:
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+database, which is the UCSC database name (such as hg18, mm9, etc.) and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+table which should be the users initials followed by an underscore and a
+ short description of the data:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | upload_to_ucsc --database=hg18 --table=mah_snoRNAs
+\end_layout
+
+\begin_layout Standard
+The
+\series bold
+upload_to_ucsc
+\series default
+ biopiece modifies the users ~/ucsc/my_tracks.ra file automagically (a backup
+ is created with the name ~/ucsc/my_tracks.ra~) with default values that
+ can be overridden using the following switches:
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+short_label - Short label for track - Default=database->table
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+long_label - Long label for track - Default=database->table
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+group - Track group name - Default=<user name as defined in env>
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+priority - Track display priority - Default=1
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+color - Track color - Default=147,73,42
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+chunk_size - Chunks for loading - Default=10000000
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+visibility - Track visibility - Default=pack
+\end_layout
+
+\begin_layout Standard
+Also, data in BED or PSL format can be uploaded with
+\series bold
+upload_to_ucsc
+\series default
+ as long as these reference to genomes and chromosomes existing in the UCSC
+ Genome Browser:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<bed file> | upload_to_ucsc ...
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+read_psl --data_in=<psl file> | upload_to_ucsc ...
+\end_layout
+
+\begin_layout Section
+Power Scripting
+\end_layout
+
+\begin_layout Standard
+It is possible to do commandline scripting of biopiece records using Perl.
+ Because a biopiece record essentially is a hash structure, you can pass
+ records to
+\series bold
+bioscript
+\series default
+ command, which is a wrapper around the Perl executable that allows direct
+ manipulations of the records using the power of Perl.
+\end_layout
+
+\begin_layout Standard
+In the below example we replace in all records the value to the CHR key
+ with a forthrunning number:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | bioscript 'while($r=get_record(
+\backslash
+*STDIN)){$r->{CHR}=$i++; put_record($r)}'
+\end_layout
+
+\begin_layout Standard
+Something more useful would probably be to create custom FASTA headers.
+ E.g.
+ if we read in a BED file, lookup the genomic sequence, create a custom
+ FASTA header with
+\series bold
+bioscript
+\series default
+ and output FASTA entries:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | bioscript 'while($r=get_record(
+\backslash
+*STDIN)){$r->{SEQ_NAME}= //
+\end_layout
+
+\begin_layout LyX-Code
+join("_",$r->{CHR},$r->{CHR_BEG},$r->{CHR_END}); put_record($r)}'
+\end_layout
+
+\begin_layout Standard
+And the output:
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_21567527_21567550
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_693380_693403
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_13859534_13859557
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_9005090_9005113
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_2106825_2106848
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_14649031_14649054
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout Section
+Trouble shooting
+\end_layout
+
+\begin_layout Standard
+Shoot the messenger!
+\end_layout
+
+\begin_layout Section
+\start_of_appendix
+Keys
+\begin_inset LatexCommand label
+name "sec:Keys"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+HIT
+\end_layout
+
+\begin_layout Standard
+HIT_BEG
+\end_layout
+
+\begin_layout Standard
+HIT_END
+\end_layout
+
+\begin_layout Standard
+HIT_LEN
+\end_layout
+
+\begin_layout Standard
+HIT_NAME
+\end_layout
+
+\begin_layout Standard
+PATTERN
+\end_layout
+
+\begin_layout Section
+Switches
+\begin_inset LatexCommand label
+name "sec:Switches"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_in
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_out
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+data_in
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num
+\end_layout
+
+\begin_layout Section
+scan_for_matches README
+\begin_inset LatexCommand label
+name "sec:scan_for_matches-README"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches:
+\end_layout
+
+\begin_layout LyX-Code
+ A Program to Scan Nucleotide or Protein Sequences for Matching Patterns
+\end_layout
+
+\begin_layout LyX-Code
+ Ross Overbeek
+\end_layout
+
+\begin_layout LyX-Code
+ MCS
+\end_layout
+
+\begin_layout LyX-Code
+ Argonne National Laboratory
+\end_layout
+
+\begin_layout LyX-Code
+ Argonne, IL 60439
+\end_layout
+
+\begin_layout LyX-Code
+ USA
+\end_layout
+
+\begin_layout LyX-Code
+Scan_for_matches is a utility that we have written to search for
+\end_layout
+
+\begin_layout LyX-Code
+patterns in DNA and protein sequences.
+ I wrote most of the code,
+\end_layout
+
+\begin_layout LyX-Code
+although David Joerg and Morgan Price wrote sections of an
+\end_layout
+
+\begin_layout LyX-Code
+earlier version.
+ The whole notion of pattern matching has a rich
+\end_layout
+
+\begin_layout LyX-Code
+history, and we borrowed liberally from many sources.
+ However, it is
+\end_layout
+
+\begin_layout LyX-Code
+worth noting that we were strongly influenced by the elegant tools
+\end_layout
+
+\begin_layout LyX-Code
+developed and distributed by David Searls.
+ My intent is to make the
+\end_layout
+
+\begin_layout LyX-Code
+existing tool available to anyone in the research community that might
+\end_layout
+
+\begin_layout LyX-Code
+find it useful.
+ I will continue to try to fix bugs and make suggested
+\end_layout
+
+\begin_layout LyX-Code
+enhancements, at least until I feel that a superior tool exists.
+\end_layout
+
+\begin_layout LyX-Code
+Hence, I would appreciate it if all bug reports and suggestions are
+\end_layout
+
+\begin_layout LyX-Code
+directed to me at Overbeek@mcs.anl.gov.
+
+\end_layout
+
+\begin_layout LyX-Code
+I will try to log all bug fixes and report them to users that send me
+\end_layout
+
+\begin_layout LyX-Code
+their email addresses.
+ I do not require that you give me your name
+\end_layout
+
+\begin_layout LyX-Code
+and address.
+ However, if you do give it to me, I will try to notify
+\end_layout
+
+\begin_layout LyX-Code
+you of serious problems as they are discovered.
+\end_layout
+
+\begin_layout LyX-Code
+Getting Started:
+\end_layout
+
+\begin_layout LyX-Code
+ The distribution should contain at least the following programs:
+\end_layout
+
+\begin_layout LyX-Code
+ README - This document
+\end_layout
+
+\begin_layout LyX-Code
+ ggpunit.c - One of the two source files
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches.c - The second source file
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ run_tests - A perl script to test things
+\end_layout
+
+\begin_layout LyX-Code
+ show_hits - A handy perl script
+\end_layout
+
+\begin_layout LyX-Code
+ test_dna_input - Test sequences for DNA
+\end_layout
+
+\begin_layout LyX-Code
+ test_dna_patterns - Test patterns for DNA scan
+\end_layout
+
+\begin_layout LyX-Code
+ test_output - Desired output from test
+\end_layout
+
+\begin_layout LyX-Code
+ test_prot_input - Test protein sequences
+\end_layout
+
+\begin_layout LyX-Code
+ test_prot_patterns - Test patterns for proteins
+\end_layout
+
+\begin_layout LyX-Code
+ testit - a perl script used for test
+\end_layout
+
+\begin_layout LyX-Code
+ Only the first three files are required.
+ The others are useful,
+\end_layout
+
+\begin_layout LyX-Code
+ but only if you have Perl installed on your system.
+ If you do
+\end_layout
+
+\begin_layout LyX-Code
+ have Perl, I suggest that you type
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ which perl
+\end_layout
+
+\begin_layout LyX-Code
+ to find out where it installed.
+ On my system, I get the following
+\end_layout
+
+\begin_layout LyX-Code
+ response:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ clone% which perl
+\end_layout
+
+\begin_layout LyX-Code
+ /usr/local/bin/perl
+\end_layout
+
+\begin_layout LyX-Code
+ indicating that Perl is installed in /usr/local/bin.
+ Anyway, once
+\end_layout
+
+\begin_layout LyX-Code
+ you know where it is installed, edit the first line of files
+\end_layout
+
+\begin_layout LyX-Code
+ testit
+\end_layout
+
+\begin_layout LyX-Code
+ show_hits
+\end_layout
+
+\begin_layout LyX-Code
+ replacing /usr/local/bin/perl with the appropriate location.
+ I
+\end_layout
+
+\begin_layout LyX-Code
+ will assume that you can do this, although it is not critical (it
+\end_layout
+
+\begin_layout LyX-Code
+ is needed only to test the installation and to use the "show_hits"
+\end_layout
+
+\begin_layout LyX-Code
+ utility).
+ Perl is not required to actually install and run
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches.
+
+\end_layout
+
+\begin_layout LyX-Code
+ If you do not have Perl, I suggest you get it and install it (it
+\end_layout
+
+\begin_layout LyX-Code
+ is a wonderful utility).
+ Information about Perl and how to get it
+\end_layout
+
+\begin_layout LyX-Code
+ can be found in the book "Programming Perl" by Larry Wall and
+\end_layout
+
+\begin_layout LyX-Code
+ Randall L.
+ Schwartz, published by O'Reilly & Associates, Inc.
+\end_layout
+
+\begin_layout LyX-Code
+ To get started, you will need to compile the program.
+ I do this
+\end_layout
+
+\begin_layout LyX-Code
+ using
+\end_layout
+
+\begin_layout LyX-Code
+ gcc -O -o scan_for_matches ggpunit.c scan_for_matches.c
+\end_layout
+
+\begin_layout LyX-Code
+ If you do not use GNU C, use
+\end_layout
+
+\begin_layout LyX-Code
+ cc -O -DCC -o scan_for_matches ggpunit.c scan_for_matches.c
+\end_layout
+
+\begin_layout LyX-Code
+ which works on my Sun.
+
+\end_layout
+
+\begin_layout LyX-Code
+ Once you have compiled scan_for_matches, you can verify that it
+\end_layout
+
+\begin_layout LyX-Code
+ works with
+\end_layout
+
+\begin_layout LyX-Code
+ clone% run_tests tmp
+\end_layout
+
+\begin_layout LyX-Code
+ clone% diff tmp test_output
+\end_layout
+
+\begin_layout LyX-Code
+ You may get a few strange lines of the sort
+\end_layout
+
+\begin_layout LyX-Code
+ clone% run_tests tmp
+\end_layout
+
+\begin_layout LyX-Code
+ rm: tmp: No such file or directory
+\end_layout
+
+\begin_layout LyX-Code
+ clone% diff tmp test_output
+\end_layout
+
+\begin_layout LyX-Code
+ These should cause no concern.
+ However, if the "diff" shows that
+\end_layout
+
+\begin_layout LyX-Code
+ tmp and test_output are different, contact me (you have a
+\end_layout
+
+\begin_layout LyX-Code
+ problem).
+
+\end_layout
+
+\begin_layout LyX-Code
+ You should now be able to use scan_for_matches by following the
+\end_layout
+
+\begin_layout LyX-Code
+ instructions given below (which is all the normal user should have
+\end_layout
+
+\begin_layout LyX-Code
+ to understand, once things are installed properly).
+\end_layout
+
+\begin_layout LyX-Code
+ ==============================================================
+\end_layout
+
+\begin_layout LyX-Code
+How to run scan_for_matches:
+\end_layout
+
+\begin_layout LyX-Code
+ To run the program, you type need to create two files
+\end_layout
+
+\begin_layout LyX-Code
+ 1.
+ the first file contains the pattern you wish to scan for; I'll
+\end_layout
+
+\begin_layout LyX-Code
+ call this file pat_file in what follows (but any name is ok)
+\end_layout
+
+\begin_layout LyX-Code
+ 2.
+ the second file contains a set of sequences to scan.
+ These
+\end_layout
+
+\begin_layout LyX-Code
+ should be in "fasta format".
+ Just look at the contents of
+\end_layout
+
+\begin_layout LyX-Code
+ test_dna_input to see examples of this format.
+ Basically,
+\end_layout
+
+\begin_layout LyX-Code
+ each sequence begins with a line of the form
+\end_layout
+
+\begin_layout LyX-Code
+ >sequence_id
+\end_layout
+
+\begin_layout LyX-Code
+ and is followed by one or more lines containing the sequence.
+\end_layout
+
+\begin_layout LyX-Code
+ Once these files have been created, you just use
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches pat_file < input_file
+\end_layout
+
+\begin_layout LyX-Code
+ to scan all of the input sequences for the given pattern.
+ As an
+\end_layout
+
+\begin_layout LyX-Code
+ example, suppose that pat_file contains a single line of the form
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...7 3...8 ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ Then,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ should produce two "hits".
+ When I run this on my machine, I get
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[6,27]
+\end_layout
+
+\begin_layout LyX-Code
+ cguaacc ggttaacc gguuacg
+\end_layout
+
+\begin_layout LyX-Code
+ >tst2:[6,27]
+\end_layout
+
+\begin_layout LyX-Code
+ CGUAACC GGTTAACC GGUUACG
+\end_layout
+
+\begin_layout LyX-Code
+ clone%
+\end_layout
+
+\begin_layout LyX-Code
+Simple Patterns Built by Matching Ranges and Reverse Complements
+\end_layout
+
+\begin_layout LyX-Code
+ Let me first explain this simple pattern:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...7 3...8 ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ The pattern consists of three "pattern units" separated by spaces.
+\end_layout
+
+\begin_layout LyX-Code
+ The first pattern unit is
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...7
+\end_layout
+
+\begin_layout LyX-Code
+ which means "match 4 to 7 characters and call them p1".
+ The
+\end_layout
+
+\begin_layout LyX-Code
+ second pattern unit is
+\end_layout
+
+\begin_layout LyX-Code
+ 3...8
+\end_layout
+
+\begin_layout LyX-Code
+ which means "then match 3 to 8 characters".
+ The last pattern unit
+\end_layout
+
+\begin_layout LyX-Code
+ is
+\end_layout
+
+\begin_layout LyX-Code
+ ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ which means "match the reverse complement of p1".
+ The first
+\end_layout
+
+\begin_layout LyX-Code
+ reported hit is shown as
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[6,27]
+\end_layout
+
+\begin_layout LyX-Code
+ cguaacc ggttaacc gguuacg
+\end_layout
+
+\begin_layout LyX-Code
+ which states that characters 6 through 27 of sequence tst1 were
+\end_layout
+
+\begin_layout LyX-Code
+ matched.
+ "cguaac" matched the first pattern unit, "ggttaacc" the
+\end_layout
+
+\begin_layout LyX-Code
+ second, and "gguuacg" the third.
+ This is an example of a common
+\end_layout
+
+\begin_layout LyX-Code
+ type of pattern used to search for sections of DNA or RNA that
+\end_layout
+
+\begin_layout LyX-Code
+ would fold into a hairpin loop.
+\end_layout
+
+\begin_layout LyX-Code
+Searching Both Strands
+\end_layout
+
+\begin_layout LyX-Code
+ Now for a short aside: scan_for_matches only searched the
+\end_layout
+
+\begin_layout LyX-Code
+ sequences in the input file; it did not search the opposite
+\end_layout
+
+\begin_layout LyX-Code
+ strand.
+ With a pattern of the sort we just used, there is not
+\end_layout
+
+\begin_layout LyX-Code
+ need o search the opposite strand.
+ However, it is normally the
+\end_layout
+
+\begin_layout LyX-Code
+ case that you will wish to search both the sequence and the
+\end_layout
+
+\begin_layout LyX-Code
+ opposite strand (i.e., the reverse complement of the sequence).
+\end_layout
+
+\begin_layout LyX-Code
+ To do that, you would just use the "-c" command line.
+ For example,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches -c pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ Hits on the opposite strand will show a beginning location greater
+\end_layout
+
+\begin_layout LyX-Code
+ than te end location of the match.
+\end_layout
+
+\begin_layout LyX-Code
+Defining Pairing Rules and Allowing Mismatches, Insertions, and Deletions
+\end_layout
+
+\begin_layout LyX-Code
+ Let us stop now and ask "What additional features would one need to
+\end_layout
+
+\begin_layout LyX-Code
+ really find the kinds of loop structures that characterize tRNAs,
+\end_layout
+
+\begin_layout LyX-Code
+ rRNAs, and so forth?" I can immediately think of two:
+\end_layout
+
+\begin_layout LyX-Code
+ a) you will need to be able to allow non-standard pairings
+\end_layout
+
+\begin_layout LyX-Code
+ (those other than G-C and A-U), and
+\end_layout
+
+\begin_layout LyX-Code
+ b) you will need to be able to tolerate some number of
+\end_layout
+
+\begin_layout LyX-Code
+ mismatches and bulges.
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ Let me first show you how to handle non-standard "rules for
+\end_layout
+
+\begin_layout LyX-Code
+ pairing in reverse complements".
+ Consider the following pattern,
+\end_layout
+
+\begin_layout LyX-Code
+ which I show as two line (you may use as many lines as you like in
+\end_layout
+
+\begin_layout LyX-Code
+ forming a pattern, although you can only break a pattern at points
+\end_layout
+
+\begin_layout LyX-Code
+ where space would be legal):
+\end_layout
+
+\begin_layout LyX-Code
+ r1={au,ua,gc,cg,gu,ug,ga,ag}
+\end_layout
+
+\begin_layout LyX-Code
+ p1=2...3 0...4 p2=2...5 1...5 r1~p2 0...4 ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ The first "pattern unit" does not actually match anything; rather,
+\end_layout
+
+\begin_layout LyX-Code
+ it defines a "pairing rule" in which standard pairings are
+\end_layout
+
+\begin_layout LyX-Code
+ allowed, as well as G-A and A-G (in case you wondered, Us and Ts
+\end_layout
+
+\begin_layout LyX-Code
+ and upper and lower case can be used interchangably; for example
+\end_layout
+
+\begin_layout LyX-Code
+ r1={AT,UA,gc,cg} could be used to define the "standard rule" for
+\end_layout
+
+\begin_layout LyX-Code
+ pairings).
+ The second line consists of six pattern units which
+\end_layout
+
+\begin_layout LyX-Code
+ may be interpreted as follows:
+\end_layout
+
+\begin_layout LyX-Code
+ p1=2...3 match 2 or 3 characters (call it p1)
+\end_layout
+
+\begin_layout LyX-Code
+ 0...4 match 0 to 4 characters
+\end_layout
+
+\begin_layout LyX-Code
+ p2=2...5 match 2 to 5 characters (call it p2)
+\end_layout
+
+\begin_layout LyX-Code
+ 1...5 match 1 to 5 characters
+\end_layout
+
+\begin_layout LyX-Code
+ r1~p2 match the reverse complement of p2,
+\end_layout
+
+\begin_layout LyX-Code
+ allowing G-A and A-G pairs
+\end_layout
+
+\begin_layout LyX-Code
+ 0...4 match 0 to 4 characters
+\end_layout
+
+\begin_layout LyX-Code
+ ~p1 match the reverse complement of p1
+\end_layout
+
+\begin_layout LyX-Code
+ allowing only G-C, C-G, A-T, and T-A pairs
+\end_layout
+
+\begin_layout LyX-Code
+ Thus, r1~p2 means "match the reverse complement of p2 using rule r1".
+\end_layout
+
+\begin_layout LyX-Code
+ Now let us consider the issue of tolerating mismatches and bulges.
+\end_layout
+
+\begin_layout LyX-Code
+ You may add a "qualifier" to the pattern unit that gives the
+\end_layout
+
+\begin_layout LyX-Code
+ tolerable number of "mismatches, deletions, and insertions".
+\end_layout
+
+\begin_layout LyX-Code
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=10...10 3...8 ~p1[1,2,1]
+\end_layout
+
+\begin_layout LyX-Code
+ means that the third pattern unit must match 10 characters,
+\end_layout
+
+\begin_layout LyX-Code
+ allowing one "mismatch" (a pairing other than G-C, C-G, A-T, or
+\end_layout
+
+\begin_layout LyX-Code
+ T-A), two deletions (a deletion is a character that occurs in p1,
+\end_layout
+
+\begin_layout LyX-Code
+ but has been "deleted" from the string matched by ~p1), and one
+\end_layout
+
+\begin_layout LyX-Code
+ insertion (an "insertion" is a character that occurs in the string
+\end_layout
+
+\begin_layout LyX-Code
+ matched by ~p1, but not for which no corresponding character
+\end_layout
+
+\begin_layout LyX-Code
+ occurs in p1).
+ In this case, the pattern would match
+\end_layout
+
+\begin_layout LyX-Code
+ ACGTACGTAC GGGGGGGG GCGTTACCT
+\end_layout
+
+\begin_layout LyX-Code
+ which is, you must admit, a fairly weak loop.
+ It is common to
+\end_layout
+
+\begin_layout LyX-Code
+ allow mismatches, but you will find yourself using insertions and
+\end_layout
+
+\begin_layout LyX-Code
+ deletions much more rarely.
+ In any event, you should note that
+\end_layout
+
+\begin_layout LyX-Code
+ allowing mismatches, insertions, and deletions does force the
+\end_layout
+
+\begin_layout LyX-Code
+ program to try many additional possible pairings, so it does slow
+\end_layout
+
+\begin_layout LyX-Code
+ things down a bit.
+\end_layout
+
+\begin_layout LyX-Code
+How Patterns Are Matched
+\end_layout
+
+\begin_layout LyX-Code
+ Now is as good a time as any to discuss the basic flow of control
+\end_layout
+
+\begin_layout LyX-Code
+ when matching patterns.
+ Recall that a "pattern" is a sequence of
+\end_layout
+
+\begin_layout LyX-Code
+ "pattern units".
+ Suppose that the pattern units were
+\end_layout
+
+\begin_layout LyX-Code
+ u1 u2 u3 u4 ...
+ un
+\end_layout
+
+\begin_layout LyX-Code
+ The scan of a sequence S begins by setting the current position
+\end_layout
+
+\begin_layout LyX-Code
+ to 1.
+ Then, an attempt is made to match u1 starting at the
+\end_layout
+
+\begin_layout LyX-Code
+ current position.
+ Each attempt to match a pattern unit can
+\end_layout
+
+\begin_layout LyX-Code
+ succeed or fail.
+ If it succeeds, then an attempt is made to match
+\end_layout
+
+\begin_layout LyX-Code
+ the next unit.
+ If it fails, then an attempt is made to find an
+\end_layout
+
+\begin_layout LyX-Code
+ alternative match for the immediately preceding pattern unit.
+ If
+\end_layout
+
+\begin_layout LyX-Code
+ this succeeds, then we proceed forward again to the next unit.
+ If
+\end_layout
+
+\begin_layout LyX-Code
+ it fails we go back to the preceding unit.
+ This process is called
+\end_layout
+
+\begin_layout LyX-Code
+ "backtracking".
+ If there are no previous units, then the current
+\end_layout
+
+\begin_layout LyX-Code
+ position is incremented by one, and everything starts again.
+ This
+\end_layout
+
+\begin_layout LyX-Code
+ proceeds until either the current position goes past the end of
+\end_layout
+
+\begin_layout LyX-Code
+ the sequence or all of the pattern units succeed.
+ On success,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches reports the "hit", the current position is set
+\end_layout
+
+\begin_layout LyX-Code
+ just past the hit, and an attempt is made to find another hit.
+\end_layout
+
+\begin_layout LyX-Code
+ If you wish to limit the scan to simply finding a maximum of, say,
+\end_layout
+
+\begin_layout LyX-Code
+ 10 hits, you can use the -n option (-n 10 would set the limit to
+\end_layout
+
+\begin_layout LyX-Code
+ 10 reported hits).
+ For example,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches -c -n 1 pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ would search for just the first hit (and would stop searching the
+\end_layout
+
+\begin_layout LyX-Code
+ current sequences or any that follow in the input file).
+\end_layout
+
+\begin_layout LyX-Code
+Searching for repeats:
+\end_layout
+
+\begin_layout LyX-Code
+ In the last section, I discussed almost all of the details
+\end_layout
+
+\begin_layout LyX-Code
+ required to allow you to look for repeats.
+ Consider the following
+\end_layout
+
+\begin_layout LyX-Code
+ set of patterns:
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...6 3...8 p1 (find exact 6 character repeat separated
+\end_layout
+
+\begin_layout LyX-Code
+ by to 8 characters)
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...6 3..8 p1[1,0,0] (allow one mismatch)
+\end_layout
+
+\begin_layout LyX-Code
+ p1=3...3 p1[1,0,0] p1[1,0,0] p1[1,0,0]
+\end_layout
+
+\begin_layout LyX-Code
+ (match 12 characters that are the remains
+\end_layout
+
+\begin_layout LyX-Code
+ of a 3-character sequence occurring 4 times)
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...8 0...3 p2=6...8 p1 0...3 p2
+\end_layout
+
+\begin_layout LyX-Code
+ (This would match things like
+\end_layout
+
+\begin_layout LyX-Code
+ ATCT G TCTTT ATCT TG TCTTT
+\end_layout
+
+\begin_layout LyX-Code
+ )
+\end_layout
+
+\begin_layout LyX-Code
+Searching for particular sequences:
+\end_layout
+
+\begin_layout LyX-Code
+ Occasionally, one wishes to match a specific, known sequence.
+\end_layout
+
+\begin_layout LyX-Code
+ In such a case, you can just give the sequence (along with an
+\end_layout
+
+\begin_layout LyX-Code
+ optional statement of the allowable mismatches, insertions, and
+\end_layout
+
+\begin_layout LyX-Code
+ deletions).
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...8 GAGA ~p1 (match a hairpin with GAGA as the loop)
+\end_layout
+
+\begin_layout LyX-Code
+ RRRRYYYY (match 4 purines followed by 4 pyrimidines)
+\end_layout
+
+\begin_layout LyX-Code
+ TATAA[1,0,0] (match TATAA, allowing 1 mismatch)
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+Matches against a "weight matrix":
+\end_layout
+
+\begin_layout LyX-Code
+ I will conclude my examples of the types of pattern units
+\end_layout
+
+\begin_layout LyX-Code
+ available for matching against nucleotide sequences by discussing a
+\end_layout
+
+\begin_layout LyX-Code
+ crude implemetation of matching using a "weight matrix".
+ While I
+\end_layout
+
+\begin_layout LyX-Code
+ am less than overwhelmed with the syntax that I chose, I think that
+\end_layout
+
+\begin_layout LyX-Code
+ the reader should be aware that I was thinking of generating
+\end_layout
+
+\begin_layout LyX-Code
+ patterns containing such pattern units automatically from
+\end_layout
+
+\begin_layout LyX-Code
+ alignments (and did not really plan on typing such things in by
+\end_layout
+
+\begin_layout LyX-Code
+ hand very often).
+ Anyway, suppose that you wanted to match a
+\end_layout
+
+\begin_layout LyX-Code
+ sequence of eight characters.
+ The "consensus" of these eight
+\end_layout
+
+\begin_layout LyX-Code
+ characters is GRCACCGS, but the actual "frequencies of occurrence"
+\end_layout
+
+\begin_layout LyX-Code
+ are given in the matrix below.
+ Thus, the first character is an A
+\end_layout
+
+\begin_layout LyX-Code
+ 16% the time and a G 84% of the time.
+ The second is an A 57% of
+\end_layout
+
+\begin_layout LyX-Code
+ the time, a C 10% of the time, a G 29% of the time, and a T 4% of
+\end_layout
+
+\begin_layout LyX-Code
+ the time.
+
+\end_layout
+
+\begin_layout LyX-Code
+ C1 C2 C3 C4 C5 C6 C7 C8
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ A 16 57 0 95 0 18 0 0
+\end_layout
+
+\begin_layout LyX-Code
+ C 0 10 80 0 100 60 0 50
+\end_layout
+
+\begin_layout LyX-Code
+ G 84 29 0 0 0 20 100 50
+\end_layout
+
+\begin_layout LyX-Code
+ T 0 4 20 5 0 2 0 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ One could use the following pattern unit to search for inexact
+\end_layout
+
+\begin_layout LyX-Code
+ matches related to such a "weight matrix":
+\end_layout
+
+\begin_layout LyX-Code
+ {(16,0,84,0),(57,10,29,4),(0,80,0,20),(95,0,0,5),
+\end_layout
+
+\begin_layout LyX-Code
+ (0,100,0,0),(18,60,20,2),(0,0,100,0),(0,50,50,0)} > 450
+\end_layout
+
+\begin_layout LyX-Code
+ This pattern unit will attempt to match exactly eight characters.
+\end_layout
+
+\begin_layout LyX-Code
+ For each character in the sequence, the entry in the corresponding
+\end_layout
+
+\begin_layout LyX-Code
+ tuple is added to an accumulated sum.
+ If the sum is greater than
+\end_layout
+
+\begin_layout LyX-Code
+ 450, the match succeeds; else it fails.
+\end_layout
+
+\begin_layout LyX-Code
+ Recently, this feature was upgraded to allow ranges.
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ 600 > {(16,0,84,0),(57,10,29,4),(0,80,0,20),(95,0,0,5),
+\end_layout
+
+\begin_layout LyX-Code
+ (0,100,0,0),(18,60,20,2),(0,0,100,0),(0,50,50,0)} > 450
+\end_layout
+
+\begin_layout LyX-Code
+ will work, as well.
+\end_layout
+
+\begin_layout LyX-Code
+Allowing Alternatives:
+\end_layout
+
+\begin_layout LyX-Code
+ Very occasionally, you may wish to allow alternative pattern units
+\end_layout
+
+\begin_layout LyX-Code
+ (i.e., "match either A or B").
+ You can do this using something
+\end_layout
+
+\begin_layout LyX-Code
+ like
+\end_layout
+
+\begin_layout LyX-Code
+ ( GAGA | GCGCA)
+\end_layout
+
+\begin_layout LyX-Code
+ which says "match either GAGA or GCGCA".
+ You may take
+\end_layout
+
+\begin_layout LyX-Code
+ alternatives of a list of pattern units, for example
+\end_layout
+
+\begin_layout LyX-Code
+ (p1=3...3 3...8 ~p1 | p1=5...5 4...4 ~p1 GGG)
+\end_layout
+
+\begin_layout LyX-Code
+ would match one of two sequences of pattern units.
+ There is one
+\end_layout
+
+\begin_layout LyX-Code
+ clumsy aspect of the syntax: to match a list of alternatives, you
+\end_layout
+
+\begin_layout LyX-Code
+ need to fully the request.
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ (GAGA | (GCGCA | TTCGA))
+\end_layout
+
+\begin_layout LyX-Code
+ would be needed to try the three alternatives.
+\end_layout
+
+\begin_layout LyX-Code
+One Minor Extension
+\end_layout
+
+\begin_layout LyX-Code
+ Sometimes a pattern will contain a sequence of distinct ranges,
+\end_layout
+
+\begin_layout LyX-Code
+ and you might wish to limit the sum of the lengths of the matched
+\end_layout
+
+\begin_layout LyX-Code
+ subsequences.
+ For example, suppose that you basically wanted to
+\end_layout
+
+\begin_layout LyX-Code
+ match something like
+\end_layout
+
+\begin_layout LyX-Code
+ ARRYYTT p1=0...5 GCA[1,0,0] p2=1...6 ~p1 4...8 ~p2 p3=4...10 CCT
+\end_layout
+
+\begin_layout LyX-Code
+ but that the sum of the lengths of p1, p2, and p3 must not exceed
+\end_layout
+
+\begin_layout LyX-Code
+ eight characters.
+ To do this, you could add
+\end_layout
+
+\begin_layout LyX-Code
+ length(p1+p2+p3) < 9
+\end_layout
+
+\begin_layout LyX-Code
+ as the last pattern unit.
+ It will just succeed or fail (but does
+\end_layout
+
+\begin_layout LyX-Code
+ not actually match any characters in the sequence).
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+Matching Protein Sequences
+\end_layout
+
+\begin_layout LyX-Code
+ Suppose that the input file contains protein sequences.
+ In this
+\end_layout
+
+\begin_layout LyX-Code
+ case, you must invoke scan_for_matches with the "-p" option.
+ You
+\end_layout
+
+\begin_layout LyX-Code
+ cannot use aspects of the language that relate directly to
+\end_layout
+
+\begin_layout LyX-Code
+ nucleotide sequences (e.g., the -c command line option or pattern
+\end_layout
+
+\begin_layout LyX-Code
+ constructs referring to the reverse complement of a previously
+\end_layout
+
+\begin_layout LyX-Code
+ matched unit).
+
+\end_layout
+
+\begin_layout LyX-Code
+ You also have two additional constructs that allow you to match
+\end_layout
+
+\begin_layout LyX-Code
+ either "one of a set of amino acids" or "any amino acid other than
+\end_layout
+
+\begin_layout LyX-Code
+ those a given set".
+ For example,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=0...4 any(HQD) 1...3 notany(HK) p1
+\end_layout
+
+\begin_layout LyX-Code
+ would successfully match a string like
+\end_layout
+
+\begin_layout LyX-Code
+ YWV D AA C YWV
+\end_layout
+
+\begin_layout LyX-Code
+Using the show_hits Utility
+\end_layout
+
+\begin_layout LyX-Code
+ When viewing a large set of complex matches, you might find it
+\end_layout
+
+\begin_layout LyX-Code
+ convenient to post-process the scan_for_matches output to get a
+\end_layout
+
+\begin_layout LyX-Code
+ more readable version.
+ We provide a simple post-processor called
+\end_layout
+
+\begin_layout LyX-Code
+ "show_hits".
+ To see its effect, just pipe the output of a
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches into show_hits:
+\end_layout
+
+\begin_layout LyX-Code
+ Normal Output:
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches -c pat_file < tmp
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[1,28]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacguaacc ggttaac cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[28,1]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacgtaacc ggttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ >tst2:[2,31]
+\end_layout
+
+\begin_layout LyX-Code
+ CGTACGUAAC C GGTTAACC GGUUACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ >tst2:[31,2]
+\end_layout
+
+\begin_layout LyX-Code
+ CGTACGTAAC C GGTTAACC GGTTACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ >tst3:[3,32]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacguaacc g gttaactt cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ >tst3:[32,3]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacgtaacc g aagttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ Piped Through show_hits:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches -c pat_file < tmp | show_hits
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[1,28]: gtacguaacc ggttaac cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[28,1]: gtacgtaacc ggttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[2,31]: CGTACGUAAC C GGTTAACC GGUUACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[31,2]: CGTACGTAAC C GGTTAACC GGTTACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[3,32]: gtacguaacc g gttaactt cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[32,3]: gtacgtaacc g aagttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ clone%
+\end_layout
+
+\begin_layout LyX-Code
+ Optionally, you can specify which of the "fields" in the matches
+\end_layout
+
+\begin_layout LyX-Code
+ you wish to sort on, and show_hits will sort them.
+ The field
+\end_layout
+
+\begin_layout LyX-Code
+ numbers start with 0.
+ So, you might get something like
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches -c pat_file < tmp | show_hits 2 1
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[2,31]: CGTACGUAAC C GGTTAACC GGUUACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[31,2]: CGTACGTAAC C GGTTAACC GGTTACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[32,3]: gtacgtaacc g aagttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[1,28]: gtacguaacc ggttaac cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[28,1]: gtacgtaacc ggttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[3,32]: gtacguaacc g gttaactt cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ clone%
+\end_layout
+
+\begin_layout LyX-Code
+ In this case, the hits have been sorted on fields 2 and 1 (that is,
+\end_layout
+
+\begin_layout LyX-Code
+ the third and second matched subfields).
+\end_layout
+
+\begin_layout LyX-Code
+ show_hits is just one possible little post-processor, and you
+\end_layout
+
+\begin_layout LyX-Code
+ might well wish to write a customized one for yourself.
+\end_layout
+
+\begin_layout LyX-Code
+Reducing the Cost of a Search
+\end_layout
+
+\begin_layout LyX-Code
+ The scan_for_matches utility uses a fairly simple search, and may
+\end_layout
+
+\begin_layout LyX-Code
+ consume large amounts of CPU time for complex patterns.
+ Someday,
+\end_layout
+
+\begin_layout LyX-Code
+ I may decide to optimize the code.
+ However, until then, let me
+\end_layout
+
+\begin_layout LyX-Code
+ mention one useful technique.
+
+\end_layout
+
+\begin_layout LyX-Code
+ When you have a complex pattern that includes a number of varying
+\end_layout
+
+\begin_layout LyX-Code
+ ranges, imprecise matches, and so forth, it is useful to
+\end_layout
+
+\begin_layout LyX-Code
+ "pipeline" matches.
+ That is, form a simpler pattern that can be
+\end_layout
+
+\begin_layout LyX-Code
+ used to scan through a large database extracting sections that
+\end_layout
+
+\begin_layout LyX-Code
+ might be matched by the more complex pattern.
+ Let me illustrate
+\end_layout
+
+\begin_layout LyX-Code
+ with a short example.
+ Suppose that you really wished to match the
+\end_layout
+
+\begin_layout LyX-Code
+ pattern
+\end_layout
+
+\begin_layout LyX-Code
+ p1=3...5 0...8 ~p1[1,1,0] p2=6...7 3...6 AGC 3...5 RYGC ~p2[1,0,0]
+\end_layout
+
+\begin_layout LyX-Code
+ In this case, the pattern units AGC 3...5 RYGC can be used to rapidly
+\end_layout
+
+\begin_layout LyX-Code
+ constrain the overall search.
+ You can preprocess the overall
+\end_layout
+
+\begin_layout LyX-Code
+ database using the pattern:
+\end_layout
+
+\begin_layout LyX-Code
+ 31...31 AGC 3...5 RYGC 7...7
+\end_layout
+
+\begin_layout LyX-Code
+ Put the complex pattern in pat_file1 and the simpler pattern in
+\end_layout
+
+\begin_layout LyX-Code
+ pat_file2.
+ Then use,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches -c pat_file2 < nucleotide_database |
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches pat_file1
+\end_layout
+
+\begin_layout LyX-Code
+ The output will show things like
+\end_layout
+
+\begin_layout LyX-Code
+ >seqid:[232,280][2,47]
+\end_layout
+
+\begin_layout LyX-Code
+ matches pieces
+\end_layout
+
+\begin_layout LyX-Code
+ Then, the actual section of the sequence that was matched can be
+\end_layout
+
+\begin_layout LyX-Code
+ easily computed as [233,278] (remember, the positions start from
+\end_layout
+
+\begin_layout LyX-Code
+ 1, not 0).
+\end_layout
+
+\begin_layout LyX-Code
+ Let me finally add, you should do a few short experiments to see
+\end_layout
+
+\begin_layout LyX-Code
+ whether or not such pipelining actually improves performance -- it
+\end_layout
+
+\begin_layout LyX-Code
+ is not always obvious where the time is going, and I have
+\end_layout
+
+\begin_layout LyX-Code
+ sometimes found that the added complexity of pipelining actually
+\end_layout
+
+\begin_layout LyX-Code
+ slowed things up.
+ It gets its best improvements when there are
+\end_layout
+
+\begin_layout LyX-Code
+ exact matches of more than just a few characters that can be
+\end_layout
+
+\begin_layout LyX-Code
+ rapidly used to eliminate large sections of the database.
+\end_layout
+
+\begin_layout LyX-Code
+=============
+\end_layout
+
+\begin_layout LyX-Code
+Additions:
+\end_layout
+
+\begin_layout LyX-Code
+Feb 9, 1995: the pattern units ^ and $ now work as in normal regular
+\end_layout
+
+\begin_layout LyX-Code
+ expressions.
+ That is
+\end_layout
+
+\begin_layout LyX-Code
+ TTF $
+\end_layout
+
+\begin_layout LyX-Code
+ matches only TTF at the end of the string and
+\end_layout
+
+\begin_layout LyX-Code
+ ^ TTF
+\end_layout
+
+\begin_layout LyX-Code
+ matches only an initial TTF
+\end_layout
+
+\begin_layout LyX-Code
+ The pattern unit
+\end_layout
+
+\begin_layout LyX-Code
+ <p1
+\end_layout
+
+\begin_layout LyX-Code
+ matches the reverse of the string named p1.
+ That is,
+\end_layout
+
+\begin_layout LyX-Code
+ if p1 matched GCAT, then <p1 would match TACG.
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...6 <p1
+\end_layout
+
+\begin_layout LyX-Code
+ matches a real palindrome (not the biologically common
+\end_layout
+
+\begin_layout LyX-Code
+ meaning of "reverse complement")
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\end_body
+\end_document
--- /dev/null
+#LyX 1.5.1 created this file. For more info see http://www.lyx.org/
+\lyxformat 276
+\begin_document
+\begin_header
+\textclass scrartcl
+\begin_preamble
+\usepackage[colorlinks=true, urlcolor=blue, linkcolor=black]{hyperref}
+\end_preamble
+\language english
+\inputencoding auto
+\font_roman default
+\font_sans default
+\font_typewriter default
+\font_default_family default
+\font_sc false
+\font_osf false
+\font_sf_scale 100
+\font_tt_scale 100
+\graphics default
+\paperfontsize default
+\spacing single
+\papersize default
+\use_geometry false
+\use_amsmath 1
+\use_esint 1
+\cite_engine basic
+\use_bibtopic false
+\paperorientation portrait
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation skip
+\defskip medskip
+\quotes_language english
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tracking_changes false
+\output_changes false
+\author ""
+\author ""
+\end_header
+
+\begin_body
+
+\begin_layout Title
+Biotools Cookbook
+\end_layout
+
+\begin_layout Author
+Martin Asser Hansen
+\end_layout
+
+\begin_layout Publishers
+John Mattick Group
+\newline
+Institute for Molecular Bioscience
+\newline
+University of Queensland
+\newline
+Aust
+ralia
+\newline
+E-mail: mail@maasha.dk
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+
+\backslash
+thispagestyle{empty}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
+\newpage
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset LatexCommand tableofcontents
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset FloatList figure
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
+\newpage
+
+\end_layout
+
+\begin_layout Section
+Introduction
+\end_layout
+
+\begin_layout Standard
+Biotools is a selection of simple tools that can be linked together (piped
+ as we shall call it) in a very flexible manner to perform both simple and
+ complex tasks.
+ The fundamental idea is that biotools work on a data stream that will only
+ terminate at the end of an analysis and that this data stream can be passed
+ through several different biotools, each performing one specific task.
+ The advantage of this approach is that a user can perform simple and complex
+ tasks without having to write advanced code.
+ Moreover, since the data format used to pass data between biotools is text
+ based, biotools can be written by different developers in their favorite
+ programming language --- and still the biotools will be able to work together.
+\end_layout
+
+\begin_layout Standard
+In the most simple form bioools can be piped together on the command line
+ like this (using the pipe character '|'):
+\end_layout
+
+\begin_layout LyX-Code
+read_data | calculate_something | write_result
+\end_layout
+
+\begin_layout Standard
+However, a more comprehensive analysis could be composed:
+\end_layout
+
+\begin_layout LyX-Code
+read_data | select_entries | convert_entries | search_database
+\end_layout
+
+\begin_layout LyX-Code
+evaluate_results | plot_diagram | plot_another_diagram |
+\end_layout
+
+\begin_layout LyX-Code
+load_to_database
+\end_layout
+
+\begin_layout Standard
+The data stream that is piped through the biotools consists of records of
+ key/value pairs in the same way a hash does in order to keep as simple
+ a structure as possible.
+ An example record can be seen below:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+REC_TYPE: PATSCAN
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+MATCH: AGATCAAGTG
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+S_BEG: 7
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+S_END: 16
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ALIGN_LEN: 9
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+S_ID: piR-t6
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+STRAND: +
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+PATTERN: AGATCAAGTG
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+---
+\end_layout
+
+\begin_layout Standard
+The '
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+' denotes the delimiter of the records, and each key is a word followed
+ by a ':' and a white-space and then the value.
+ By convention the biotools only uses upper case keys (a list of used keys
+ can be seen in Appendix\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sec:Keys"
+
+\end_inset
+
+).
+ Since the records basically are hash structures this mean that the order
+ of the keys in the stream is unordered, and in the above example it is
+ pure coincidence that HIT_BEG is displayed before HIT_END, however, when
+ the order of the keys is importent, the biotools will automagically see
+ to that.
+\end_layout
+
+\begin_layout Standard
+All of the biotools are able to read and write a data stream to and from
+ file as long as the records are in the biotools format.
+ This means that if you are undertaking a lengthy analysis where one of
+ the steps is time consuming, you may save the stream after this step, and
+ subsequently start one or more analysis from that last step
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+It is a goal that the biotools at some point will be able to dump the data
+ stream to file in case one of the tools fail critically.
+\end_layout
+
+\end_inset
+
+.
+ If you are running a lengthy analysis it is highly recommended that you
+ create a small test sample of the data and run that through the pipeline
+ --- and once you are satisfied with the result proceed with the full data
+ set (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-select-a-few-records"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Standard
+All of the biotools can be supplied with long arguments prefixed with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+ switches or single character arguments prefixed with - switches that can
+ be grouped together (e.g.
+ -xok).
+ In this cookbook only the long switches are used to emphasize what these
+ switches do.
+\end_layout
+
+\begin_layout Section
+Setup
+\end_layout
+
+\begin_layout Standard
+In order to get the biotools to work, you need to add environment settings
+ to include the code binaries, scripts, and modules that constitute the
+ biotools package.
+ Assuming that you are using bash, add the following to your '~/.bashrc'
+ file using your favorite editor.
+ After the changes has been saved you need to either run 'source ~/.bashrc'
+ or relogin.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+if [ -f "/home/m.hansen/maasha/conf/bashrc" ]; then
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ source "/home/m.hansen/maasha/conf/bashrc"
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+fi
+\end_layout
+
+\begin_layout Section
+Getting Started
+\end_layout
+
+\begin_layout Standard
+The biotool
+\series bold
+list_biotools
+\series default
+ lists all the biotools along with a description:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+list_biotools
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+align_seq Align sequences in stream using Muscle.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+analyze_seq Analysis the residue composition of each sequence
+ in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+analyze_vals Determine type, count, min, max, sum and mean for
+ values in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+blast_seq BLAST sequences in stream against a specified database.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+blat_seq BLAT sequences in stream against a specified genome.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+complement_seq Complement sequences in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_records Count the number of records in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_seq Count sequences in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_vals Count the number of times values of given keys exists
+ in stream.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+create_blast_db Create a BLAST database from sequences in stream for
+ use with BLAST.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+...
+\end_layout
+
+\begin_layout Standard
+To list the biotools for writing different formats, you can use unix's grep
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+list_biotools | grep write
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_align Write aligned sequences in pretty alignment format.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_bed Write records from stream as BED lines.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_blast Write BLAST records from stream in BLAST tabular format
+ (-m8 and 9).
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_fasta Write sequences in FASTA format.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_psl Write records from stream in PSL format.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+write_tab Write records from stream as tab separated table.
+\end_layout
+
+\begin_layout Standard
+In order to find out how a specific biotool works, you just type the program
+ name without any arguments and press return and the usage of the biotool
+ will be displayed.
+ E.g.
+
+\series bold
+read_fasta
+\series default
+ <return>:
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Frameless
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Program name: read_fasta
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Contact: mail@maasha.dk
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Date: August 2007
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/
+gpl.html)
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Description: Read FASTA entries.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Usage: read_fasta [options] -i <FASTA file(s)>
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Options:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files
+ to read.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-n <int> | --num=<int>] - Limit number of records to read.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-I <file> | --stream_in=<file>] - Read input stream from file
+ - Default=STDIN
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ [-O <file> | --stream_out=<file>] - Write output stream to file
+ - Default=STDOUT
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+Examples:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i test.fna - Read FASTA entries from file.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i test1.fna,test2,fna - Read FASTA entries from files.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i '*.fna' - Read FASTA entries from files.
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+ read_fasta -i test.fna -n 10 - Read first 10 FASTA entries from
+ file.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+The Data Stream
+\end_layout
+
+\begin_layout Subsection
+How to read the data stream from file?
+\begin_inset LatexCommand label
+name "sub:How-to-read-stream"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+You want to read a data stream that you previously have saved to file in
+ biotools format.
+ This can be done implicetly or explicitly.
+ The implicit way uses the 'stdout' stream of the Unix terminal:
+\end_layout
+
+\begin_layout LyX-Code
+cat | <biotool>
+\end_layout
+
+\begin_layout Standard
+cat is the Unix command that reads a file and output the result to 'stdout'
+ --- which in this case is piped to any biotool represented by the <biotool>.
+ It is also possible to read the data stream using '<' to direct the 'stdout'
+ stream into the biotool like this:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> < <file>
+\end_layout
+
+\begin_layout Standard
+However, that will not work if you pipe more biotools together.
+ Then it is much safer to read the stream from a file explicitly like this:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --stream_in=<file>
+\end_layout
+
+\begin_layout Standard
+Here the filename <file> is explicetly given to the biotool <biotool> with
+ the switch
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_in.
+ This switch works with all biotools.
+ It is also possible to read in data from multiple sources by repeating
+ the explicit read step:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --stream_in=<file1> | <biotool> --stream_in=<file2>
+\end_layout
+
+\begin_layout Subsection
+How to write the data stream to file?
+\begin_inset LatexCommand label
+name "sub:How-to-write-stream"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+In order to save the output stream from a biotool to file, so you can read
+ in the stream again at a later time, you can do one of two things:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> > <file>
+\end_layout
+
+\begin_layout Standard
+All, the biotools write the data stream to 'stdout' by default which can
+ be written to a file by redirecting 'stdout' to file using '>' , however,
+ if one of the biotools for writing other formats is used then the both
+ the biotools records as well as the result output will go to 'stdout' in
+ a mixture causing havock! To avoid this you must use the switch
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_out that explictly tells the biotool to write the output stream to
+ file:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --stream_out=<file>
+\end_layout
+
+\begin_layout Standard
+The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_out switch works with all biotools.
+\end_layout
+
+\begin_layout Subsection
+How to terminate the data stream?
+\end_layout
+
+\begin_layout Standard
+The data stream is never stops unless the user want to save the stream or
+ by supplying the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch that will terminate the stream:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --no_stream
+\end_layout
+
+\begin_layout Standard
+The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch only works with those biotools where it makes sense that
+ the user might want to terminale the data stream,
+\emph on
+i.e
+\emph default
+.
+ after an analysis step where the user wants to output the result, but not
+ the data stream.
+\end_layout
+
+\begin_layout Subsection
+How to write my results to file?
+\begin_inset LatexCommand label
+name "sub:How-to-write-result"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Saving the result of an analysis to file can be done implicitly or explicitly.
+ The implicit way:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --no_stream > <file>
+\end_layout
+
+\begin_layout Standard
+If you use '>' to redirect 'stdout' to file then it is important to use
+ the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch to avoid writing a mix of biotools records and result to
+ the same file causing havock.
+ The safe way is to use the
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out switch which explicetly tells the biotool to write the result
+ to a given file:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --result_out=<file>
+\end_layout
+
+\begin_layout Standard
+Using the above method will not terminate the stream, so it is possible
+ to pipe that into another biotool generating different results:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool1> --result_out=<file1> | <biotool2> --result_out=<file2>
+\end_layout
+
+\begin_layout Standard
+And still the data stream will continue unless terminated with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --result_out=<file> --no_stream
+\end_layout
+
+\begin_layout Standard
+Or written to file using implicitly or explicity
+\begin_inset LatexCommand eqref
+reference "sub:How-to-write-result"
+
+\end_inset
+
+.
+ The explicit way:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --result_out=<file1> --stream_out=<file2>
+\end_layout
+
+\begin_layout Subsection
+How to read data from multiple sources?
+\end_layout
+
+\begin_layout Standard
+To read multiple data sources, with the same type or different type of data
+ do:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool1> --data_in=<file1> | <biotool2> --data_in=<file2>
+\end_layout
+
+\begin_layout Standard
+where type is the data type a specific biotool reads.
+\end_layout
+
+\begin_layout Section
+Reading input
+\end_layout
+
+\begin_layout Subsection
+How to read biotools input?
+\end_layout
+
+\begin_layout Standard
+See
+\begin_inset LatexCommand eqref
+reference "sub:How-to-read-stream"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Subsection
+How to read in data?
+\end_layout
+
+\begin_layout Standard
+Data in different formats can be read with the appropriate biotool for that
+ format.
+ The biotools are typicalled named 'read_<data type>' such as
+\series bold
+read_fasta
+\series default
+,
+\series bold
+read_bed
+\series default
+,
+\series bold
+read_tab
+\series default
+, etc., and all behave in a similar manner.
+ Data can be read by supplying the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+data_in switch and a file name to the file containing the data:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --data_in=<file>
+\end_layout
+
+\begin_layout Standard
+It is also possible to read in a saved biotools stream (see
+\begin_inset LatexCommand ref
+reference "sub:How-to-read-stream"
+
+\end_inset
+
+) as well as reading data in one go:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --stream_in=<file1> --data_in=<file2>
+\end_layout
+
+\begin_layout Standard
+If you want to read data from several files you can do this:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --data_in=<file1> | <biotool> --data_in=<file2>
+\end_layout
+
+\begin_layout Standard
+If you have several data files you can read in all explicitly with a comma
+ separated list:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --data_in=file1,file2,file3
+\end_layout
+
+\begin_layout Standard
+And it is also possible to use file globbing
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+using the short option will only work if you quote the argument -i '*.fna'
+\end_layout
+
+\end_inset
+
+:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --data_in=*.fna
+\end_layout
+
+\begin_layout Standard
+Or in a combination:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool> --data_in=file1,/dir/*.fna
+\end_layout
+
+\begin_layout Standard
+Finally, it is possible to read in data in different formats using the appropria
+te biotool for each format:
+\end_layout
+
+\begin_layout LyX-Code
+<biotool1> --data_in=<file1> | <biotool2> --data_in=<file2> ...
+\end_layout
+
+\begin_layout Subsection
+How to read FASTA input?
+\end_layout
+
+\begin_layout Standard
+Sequences in FASTA format can be read explicitly using
+\series bold
+read_fasta
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file>
+\end_layout
+
+\begin_layout Subsection
+How to read alignment input?
+\end_layout
+
+\begin_layout Standard
+If your alignment if FASTA formatted then you can
+\series bold
+read_align
+\series default
+.
+ It is also possible to use
+\series bold
+read_fasta
+\series default
+ since the data is FASTA formatted, however, with
+\series bold
+read_fasta
+\series default
+ the key ALIGN will be omitted.
+ The ALIGN key is used to determine which sequences belong to what alignment
+ which is required for
+\series bold
+write_align
+\series default
+.
+\end_layout
+
+\begin_layout LyX-Code
+read_align --data_in=<file>
+\end_layout
+
+\begin_layout Subsection
+How to read tabular input?
+\begin_inset LatexCommand label
+name "sub:How-to-read-table"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Tabular input can be read with
+\series bold
+read_tab
+\series default
+ which will read in all rows and chosen columns (separated by a given delimter)
+ from a table in text format.
+\end_layout
+
+\begin_layout Standard
+The table below:
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="4" columns="3">
+<features>
+<column alignment="left" valignment="top" width="0">
+<column alignment="left" valignment="top" width="0">
+<column alignment="left" valignment="top" width="0">
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Human
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+ATACGTCAG
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+23524
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Dog
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+AGCATGAC
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+2442
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Mouse
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+GACTG
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+234
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Cat
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+AAATGCA
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+2342
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Can be read using the command:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file>
+\end_layout
+
+\begin_layout Standard
+Which will result in four records, one for each row, where the keys V0,
+ V1, V2 are the default keys for the organism, sequence, and count, respectively.
+ It is possible to select a subset of colums to read by using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+cols switch which takes a comma separated list of columns numbers (first
+ column is designated 0) as argument.
+ So to read in only the sequence and the count so that the count comes before
+ the sequence do:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file> --cols=2,1
+\end_layout
+
+\begin_layout Standard
+It is also possible to name the columns with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys switch:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file> --cols=2,1 --keys=COUNT,SEQ
+\end_layout
+
+\begin_layout Subsection
+How to read BED input?
+\end_layout
+
+\begin_layout Standard
+The BED (Browser Extensible Data
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://genome.ucsc.edu/FAQ/FAQformat"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+) format is a tabular format for data pertaining to one of the Eukaryotic
+ genomes in the UCSC genome brower
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://genome.ucsc.edu/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+.
+ The BED format consists of up to 12 columns, where the first three are
+ mandatory CHR, CHR_BEG, and CHR_END.
+ The mandatory columns and any of the optional columns can all be read in
+ easily with the
+\series bold
+read_bed
+\series default
+ biotool.
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<file>
+\end_layout
+
+\begin_layout Standard
+It is also possible to read the BED file with
+\series bold
+read_tab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-read-table"
+
+\end_inset
+
+), however, that will be more cumbersome because you need to specify the
+ keys:
+\end_layout
+
+\begin_layout LyX-Code
+read_tab --data_in=<file> --keys=CHR,CHR_BEG,CHR_END ...
+\end_layout
+
+\begin_layout Subsection
+How to read PSL input?
+\end_layout
+
+\begin_layout Standard
+The PSL format is the output from BLAT and contains 21 mandatory fields
+ that can be read with
+\series bold
+read_psl
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+read_psl --data_in=<file>
+\end_layout
+
+\begin_layout Section
+Writing output
+\end_layout
+
+\begin_layout Standard
+All result output can be written explicitly to file using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out switch which all result generating biotools have.
+ It is also possible to write the result to file implicetly by directing
+ 'stdout' to file using '>', however, that requires the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream swich to prevent a mixture of data stream and results in the file.
+ The explicit (and safe) way:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | <biotool> --result_out=<file>
+\end_layout
+
+\begin_layout Standard
+The implicit way:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | <biotool> --no_stream > <file>
+\end_layout
+
+\begin_layout Subsection
+How to write biotools output?
+\end_layout
+
+\begin_layout Standard
+See
+\begin_inset LatexCommand eqref
+reference "sub:How-to-write-stream"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Subsection
+How to write FASTA output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-fasta"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+FASTA output can be written with
+\series bold
+write_fasta
+\series default
+.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_fasta --result_out=<file>
+\end_layout
+
+\begin_layout Standard
+It is also possible to wrap the sequences to a given width using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+wrap switch allthough wrapping of sequence is generally an evil thing:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_fasta --no_stream --wrap=80
+\end_layout
+
+\begin_layout Subsection
+How to write alignment output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-alignment"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Pretty alignments with ruler
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+'.' for every 10 residues, ':' for every 50, and '|' for every 100
+\end_layout
+
+\end_inset
+
+ and consensus sequence
+\begin_inset Note Note
+status collapsed
+
+\begin_layout Standard
+which reminds me to make that an option.
+\end_layout
+
+\end_inset
+
+ can be created with
+\series bold
+write_align
+\series default
+, what also have the optional
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+wrap switch to break the alignment into blocks of a given width:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_align --result_out=<file> --wrap=80
+\end_layout
+
+\begin_layout Standard
+If the number of sequnces in the alignment is 2 then a pairwise alignment
+ will be output otherwise a multiple alignment.
+ And if the sequence type, determined automagically, is protein, then residues
+ and symbols (+,\InsetSpace ~
+:,\InsetSpace ~
+.) will be used to show consensus according to the Blosum62
+ matrix.
+\end_layout
+
+\begin_layout Subsection
+How to write tabular output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-tab"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Outputting the data stream as a table can be done with
+\series bold
+write_tab
+\series default
+, which will write generate one row per record with the values as columns.
+ If you supply the optional
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+comment switch, when the first row in the table will be a 'comment' line
+ prefixed with a '#':
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --comment
+\end_layout
+
+\begin_layout Standard
+You can also change the delimiter from the default (tab) to
+\emph on
+e.g.
+
+\emph default
+ ',':
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --delimit=','
+\end_layout
+
+\begin_layout Standard
+If you want the values output in a specific order you have to supply a comma
+ separated list using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys switch that will print only those keys in that order:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --keys=SEQ_NAME,COUNT
+\end_layout
+
+\begin_layout Standard
+Alternatively, if you have some keys that you don't want in the tabular
+ output, use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_keys switch.
+ So to print all keys except SEQ and SEQ_TYPE do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | write_tab --result_out=<file> --no_keys=SEQ,SEQ_TYPE
+\end_layout
+
+\begin_layout Standard
+Finally, if you have a stream containing a mix of different records types,
+
+\emph on
+e.g.
+
+\emph default
+ records with sequences and records with matches, then you can use
+\series bold
+write_tab
+\series default
+ to output all the records in tabluar format, however, the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+comment,
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys, and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_keys switches will only respond to records of the first type encountered.
+ The reason is that outputting mixed records is probably not what you want
+ anyway, and you should remove all the unwanted records from the stream
+ before outputting the table:
+\series bold
+grab
+\series default
+ is your friend (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-grab"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to write a BED output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-BED"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Data in BED format can be output if the records contain the mandatory keys
+ CHR, CHR_BEG, and CHR_END using
+\series bold
+write_bed
+\series default
+.
+ If the optional keys are also present, they will be output as well:
+\end_layout
+
+\begin_layout LyX-Code
+write_bed --result_out=<file>
+\end_layout
+
+\begin_layout Subsection
+How to write PSL output?
+\begin_inset LatexCommand label
+name "sub:How-to-write-PSL"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Data in PSL format can be output using
+\series bold
+write_psl:
+\end_layout
+
+\begin_layout LyX-Code
+write_psl --result_out=<file>
+\end_layout
+
+\begin_layout Section
+Manipulating Records
+\end_layout
+
+\begin_layout Subsection
+How to select a few records?
+\begin_inset LatexCommand label
+name "sub:How-to-select-a-few-records"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+To quickly get an overview of your data you can limit the data stream to
+ show a few records.
+ This also very useful to test the pipeline with a few records if you are
+ setting up a complex analysis using several biotools.
+ That way you can inspect that all goes well before analyzing and waiting
+ for the full data set.
+ All of the read_<type> biotools have the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num switch which will take a number as argument and only that number of
+ records will be read.
+ So to read in the first 10 FASTA entries from a file:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna --num=10
+\end_layout
+
+\begin_layout Standard
+Another way of doing this is to use
+\series bold
+head_records
+\series default
+ will limit the stream to show the first 10 records (default):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | head_records
+\end_layout
+
+\begin_layout Standard
+Using
+\series bold
+head_records
+\series default
+ directly after one of the read_<type> biotools will be a lot slower than
+ using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num switch with the read_<type> biotools, however,
+\series bold
+head_records
+\series default
+ can also be used to limit the output from all the other biotools.
+ It is also possible to give
+\series bold
+head_records
+\series default
+ a number of records to show using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num switch.
+ So to display the first 100 records do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | head_records --num=100
+\end_layout
+
+\begin_layout Subsection
+How to select random records?
+\begin_inset LatexCommand label
+name "sub:How-to-select-random-records"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+If you want to inspect a number of random records from the stream this can
+ be done with the
+\series bold
+random_records
+\series default
+ biotool.
+ So if you have 1 mio records in the stream and you want to select 1000
+ random records do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | random_records --num=1000
+\end_layout
+
+\begin_layout Subsection
+How to count all records in the data stream?
+\end_layout
+
+\begin_layout Standard
+To count all the records in the data stream use
+\series bold
+count_records
+\series default
+, which adds one record (which is not included in the count) to the data
+ stream.
+ So to count the number of sequences in a FASTA file you can do this:
+\end_layout
+
+\begin_layout LyX-Code
+cat test.fna | read_fasta | count_records --no_stream
+\end_layout
+
+\begin_layout Standard
+Which will write the last record containing the count to 'stdout':
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+count_records: 630
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+---
+\end_layout
+
+\begin_layout Standard
+It is also possible to write the count to file using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out switch.
+\end_layout
+
+\begin_layout Subsection
+How to get the length of record values?
+\begin_inset LatexCommand label
+name "sub:How-to-get-value_length"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Use the
+\series bold
+length_vals
+\series default
+ biotool to get the length of each value for a comma separated list of keys:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | length_vals --keys=HIT,PATTERN
+\end_layout
+
+\begin_layout Subsection
+How to grab specific records?
+\begin_inset LatexCommand label
+name "sub:How-to-grab"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The biotool
+\series bold
+grab
+\series default
+ is related to the Unix grep and locates records based on matching keys
+ and/or values using either a pattern, a Perl regex, or a numerical evaluation.
+ To easily
+\series bold
+grab
+\series default
+ all records in the stream that has any mentioning of the pattern 'human'
+ just pipe the data stream through
+\series bold
+grab
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human
+\end_layout
+
+\begin_layout Standard
+This will search for the pattern 'human' in all keys and all values.
+ The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern switch takes a comma separated list of patterns, so in order to
+ match multiple patterns do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human,mouse
+\end_layout
+
+\begin_layout Standard
+It is also possible to use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in switch instead of
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern.
+
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in is used to read a file with one pattern per line:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern_in=patterns.txt
+\end_layout
+
+\begin_layout Standard
+If you want the opposite result --- to find all records that does not match
+ the patterns, add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+invert switch, which not only works with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern switch, but also with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+regex and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+eval:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human --invert
+\end_layout
+
+\begin_layout Standard
+If you want to search the record keys only,
+\emph on
+e.g.
+
+\emph default
+ to find all records containing the key SEQ you can add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys_only switch.
+ This will prevent matching of SEQ in any record value, and in fact SEQ
+ is a not uncommon peptide sequence you could get an unwanted record.
+ Also, this will give an increase in speed since only the keys are searched:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=SEQ --keys_only
+\end_layout
+
+\begin_layout Standard
+However, if you are interested in finding the peptide sequence SEQ and not
+ the SEQ key, just add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+vals_only switch instead:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=SEQ --vals_only
+\end_layout
+
+\begin_layout Standard
+Also, if you want to grab for certain key/value pairs you can supply a comma
+ separated list of keys whos values will then be searched using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+keys switch.
+ This is handy if your records contain large genomic sequences and you dont
+ want to search the entire sequence for
+\emph on
+e.g.
+
+\emph default
+ the organism name --- it is much faster to tell
+\series bold
+grab
+\series default
+ which keys to search the value for:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern=human --keys=SEQ_NAME
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout Standard
+It is also possible to invoke flexible matching using regex (regular expressions
+) instead of simple pattern matching.
+ In
+\series bold
+grab
+\series default
+ the regex engine is Perl based and allows use of different type of wild
+ cards, alternatives,
+\emph on
+etc
+\emph default
+
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://perldoc.perl.org/perlreref.html"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+.
+ If you want to
+\series bold
+grab
+\series default
+ records withs the sequence ATCG or GCTA you can do this:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --regex='ATCG|GCTA'
+\end_layout
+
+\begin_layout Standard
+Or if you want to find sequences beginning with ATCG:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --regex='^ATCG'
+\end_layout
+
+\begin_layout Standard
+You can also use
+\series bold
+grab
+\series default
+ to locate records that fulfill a numerical property using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+eval switch witch takes an expression in three parts.
+ The first part is the key that holds the value we want to evaluate, the
+ second part holds one the six operators:
+\end_layout
+
+\begin_layout Enumerate
+Greater than: >
+\end_layout
+
+\begin_layout Enumerate
+Greater than or equal to: >=
+\end_layout
+
+\begin_layout Enumerate
+Less than: <
+\end_layout
+
+\begin_layout Enumerate
+Less than or equal to: <=
+\end_layout
+
+\begin_layout Enumerate
+Equal to: =
+\end_layout
+
+\begin_layout Enumerate
+Not equal to: !=
+\end_layout
+
+\begin_layout Enumerate
+String wise equal to: eq
+\end_layout
+
+\begin_layout Enumerate
+String wise not equal to: ne
+\end_layout
+
+\begin_layout Standard
+And finally comes the number used in the evaluation.
+ So to
+\series bold
+grab
+\series default
+ all records with a sequence length greater than 30:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ length_seq | grab --eval='SEQ_LEN > 30'
+\end_layout
+
+\begin_layout Standard
+If you want to locate all records containing the pattern 'human' and where
+ the sequence length is greater that 30, you do this by running the stream
+ through
+\series bold
+grab
+\series default
+ twice:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --pattern='human' | length_seq | grab --eval='SEQ_LEN > 30'
+\end_layout
+
+\begin_layout Standard
+Finally, it is possible to do fast matching of expressions from a file using
+ the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+exact switch.
+ Each of these expressions has to be matched exactly over the entrie length,
+ which if useful if you have a file with accession numbers, that you want
+ to locate in the stream:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | grab --exact acc_no.txt | ...
+\end_layout
+
+\begin_layout Standard
+Using
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+exact is much faster than using
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in, because with
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+exact the expression has to be complete matches, where
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in looks for subpatterns.
+\end_layout
+
+\begin_layout Standard
+NB! To get the best speed performance, use the most restrictive
+\series bold
+grab
+\series default
+ first.
+\end_layout
+
+\begin_layout Subsection
+How to remove keys from records?
+\end_layout
+
+\begin_layout Standard
+To remove one or more specific keys from all records in the data stream
+ use
+\series bold
+remove_keys
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | remove_keys --keys=SEQ,SEQ_NAME
+\end_layout
+
+\begin_layout Standard
+In the above example SEQ and SEQ_NAME will be removed from all records if
+ they exists in these.
+ If all keys are removed from a record, then the record will be removed.
+\end_layout
+
+\begin_layout Subsection
+How to rename keys in records?
+\end_layout
+
+\begin_layout Standard
+Sometimes you want to rename a record key,
+\emph on
+e.g.
+
+\emph default
+ if you have read in a two column table with sequence name and sequence
+ in each column (see
+\begin_inset LatexCommand ref
+reference "sub:How-to-read-table"
+
+\end_inset
+
+) without specifying the key names, then the sequence name will be called
+ V0 and the sequence V1 as default in the
+\series bold
+read_tab
+\series default
+ biotool.
+ To rename the V0 and V1 keys we need to run the stream through
+\series bold
+rename_keys
+\series default
+ twice (one for each key to rename):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | rename_keys --keys=V0,SEQ_NAME | rename_keys --keys=V1,SEQ
+\end_layout
+
+\begin_layout Standard
+The first instance of
+\series bold
+rename_keys
+\series default
+ replaces all the V0 keys with SEQ_NAME, and the second instance of
+\series bold
+rename_keys
+\series default
+ replaces all the V1 keys with SEQ.
+
+\emph on
+Et viola
+\emph default
+ the data can now be used in the biotools that requires these keys.
+\end_layout
+
+\begin_layout Section
+Manipulating Sequences
+\end_layout
+
+\begin_layout Subsection
+How to get sequence lengths?
+\end_layout
+
+\begin_layout Standard
+The length for sequences in records can be determined with
+\series bold
+length_seq
+\series default
+, which adds the key SEQ_LEN to each record with the sequence length as
+ the value.
+ It also generates an extra record that is emitted last with the key TOTAL_SEQ_L
+EN showing the total length of all the sequences.
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_seq
+\end_layout
+
+\begin_layout Standard
+It is also possible to determine the sequence length using the generic tool
+
+\series bold
+length_vals
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-get-value_length"
+
+\end_inset
+
+, which determines the length of the values for a given list of keys:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_vals --keys=SEQ
+\end_layout
+
+\begin_layout Standard
+To obtain the total length of all sequences use
+\series bold
+sum_vals
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_vals --keys=SEQ
+\end_layout
+
+\begin_layout LyX-Code
+| sum_vals --keys=SEQ_LEN
+\end_layout
+
+\begin_layout Standard
+The biotool
+\series bold
+analyze_seq
+\series default
+ will also determine the length of each sequence (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-analyze"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to analyze sequence composition?
+\begin_inset LatexCommand label
+name "sub:How-to-analyze"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+If you want to find out the sequence type, composition, length, as well
+ as GC content, indel content and proportions of soft and hard masked sequence,
+ then use
+\series bold
+analyze_seq
+\series default
+.
+ This handy biotool will determine all these things per sequence from which
+ it is easy to get an overview using the
+\series bold
+write_tab
+\series default
+ biotool to output a table (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-tab"
+
+\end_inset
+
+).
+ So in order to determine the sequence composition of a FASTA file with
+ just one entry containing the sequence 'ATCG' we just read the data with
+
+\series bold
+read_fasta
+\series default
+ and run the output through
+\series bold
+analyze_seq
+\series default
+ which will add the analysis to the record like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq ...
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:D: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+MIX_INDEX: 0.55
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:W: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:G: 16
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SOFT_MASK%: 63.75
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:B: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:V: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+HARD_MASK%: 0.00
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:H: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:S: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:N: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:.: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+GC%: 35.00
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:A: 8
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:Y: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:M: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:T: 44
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SEQ_TYPE: DNA
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:K: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:~: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SEQ: TTTCAGTTTGGGACGGAGTAAGGCCTTCCtttttttttttttttttttttttttttttgagaccgagtcttgctc
+tgtcg
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+SEQ_LEN:
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+80 RES:R: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:C: 12
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:-: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+RES:U: 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\size scriptsize
+---
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout Standard
+Now to make a table of how may As, Ts, Cs, and Gs you can add the following:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | analyze_seq | write_tab --keys=RES:A,RES:T,RES:C,RES:G
+\end_layout
+
+\begin_layout Standard
+Or if you want to see the proportions of hard and soft masked sequence:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | analyse_seq | write_tab --keys=HARD_MASK%,SOFT_MASK%
+\end_layout
+
+\begin_layout Standard
+If you have a stack of sequences in one file and you want to determine the
+ mean GC content you can do it using the
+\series bold
+mean_vals
+\series default
+ biotool:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq | mean_vals --keys=GC%
+\end_layout
+
+\begin_layout Standard
+Or if you want the total count of Ns you can use
+\series bold
+sum_vals
+\series default
+ like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq | sum_vals --keys=RES:N
+\end_layout
+
+\begin_layout Standard
+The MIX_INDEX key is calculated as the count of the most common residue
+ over the sequence length, and can be used as a cut-off for removing sequence
+ tags consisting of mostly one nucleotide:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | analyze_seq | grab --eval='MIX_INDEX<0.85'
+\end_layout
+
+\begin_layout Subsection
+How to extract subsequences?
+\begin_inset LatexCommand label
+name "sub:How-to-extract"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+In order to extract a subsequence from a longer sequence use the biotool
+ extract_seq, which will replace the sequence in the record with the subsequence
+ (this behaviour should probably be modified to be dependant of a
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+replace or a
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_replace switch
+\begin_inset Note Note
+status collapsed
+
+\begin_layout Standard
+also in split_seq
+\end_layout
+
+\end_inset
+
+).
+ So to extract the first 20 residues from all sequences do (first residue
+ is designated 1):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | extract_seq --beg=1 --len=20
+\end_layout
+
+\begin_layout Standard
+You can also specify a begin and end coordinate set:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | extract_seq --beg=20 --end=40
+\end_layout
+
+\begin_layout Standard
+If you want the subsequences from position 20 to the sequence end do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | extract_seq --beg=20
+\end_layout
+
+\begin_layout Standard
+If you want to extract subsequences a given distance from the sequence end
+ you can do this by reversing the sequence with the biotool
+\series bold
+reverse_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-reverse-seq"
+
+\end_inset
+
+, followed by
+\series bold
+extract_seq
+\series default
+ to get the subsequence, and then
+\series bold
+reverse_seq
+\series default
+ again to get the subsequence back in the original orientation:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=test.fna | reverse_seq
+\end_layout
+
+\begin_layout LyX-Code
+| extract_seq --beg=10 --len=10 | reverse_seq
+\end_layout
+
+\begin_layout Subsection
+How to get genomic sequence?
+\begin_inset LatexCommand label
+name "sub:How-to-get-genomic-sequence"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The biotool
+\series bold
+get_genomic_seq
+\series default
+ can extract subsequences for a given genome specified with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+genome switch explicitly using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+beg and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+end/
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+len switches:
+\end_layout
+
+\begin_layout LyX-Code
+get_genome_seq --genome=<genome> --beg=1 --len=100
+\end_layout
+
+\begin_layout Standard
+Alternatively,
+\series bold
+get_genome_seq
+\series default
+ can be used to append the corresponding sequence to BED, PSL, and BLAST
+ records:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<BED file> | get_genome_seq --genome=<genome>
+\end_layout
+
+\begin_layout Standard
+It is also possible to include flaking sequence using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+flank switch.
+ So to include 50 nucleotides upstream and 50 nucleotides downstream for
+ each BED entry do:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<BED file> | get_genome_seq --genome=<genome> --flank=50
+\end_layout
+
+\begin_layout Subsection
+How to upper-case sequences?
+\end_layout
+
+\begin_layout Standard
+Sequences can be shifted from lower case to upper case using
+\series bold
+uppercase_seq
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | uppercase_seq
+\end_layout
+
+\begin_layout Subsection
+How to reverse sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-reverse-seq"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The order of residues in a sequence can be reversed using reverse_seq:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | reverse_seq
+\end_layout
+
+\begin_layout Standard
+Note that in order to reverse/complement a sequence you also need the
+\series bold
+complement_seq
+\series default
+ biotool (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-complement"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to complement sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-complement"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+DNA and RNA sequences can be complemented with
+\series bold
+complement_seq
+\series default
+, which automagically determines the sequence type:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | complement_seq
+\end_layout
+
+\begin_layout Standard
+Note that in order to reverse/complement a sequence you also need the
+\series bold
+reverse_seq
+\series default
+ biotool (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-reverse-seq"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to remove indels from sequnces?
+\end_layout
+
+\begin_layout Standard
+Indels can be removed from sequences with the
+\series bold
+remove_indels
+\series default
+ biotool.
+ This is useful if you have aligned some sequences (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-align"
+
+\end_inset
+
+) and extracted (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-extract"
+
+\end_inset
+
+) a block of subsequences from the alignment and you want to use these sequence
+ in a search where you need to remove the indels first.
+ '-', '~', and '.' are considered indels:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | remove_indels
+\end_layout
+
+\begin_layout Subsection
+How to shuffle sequences?
+\end_layout
+
+\begin_layout Standard
+All residues in sequences in the stream can be shuffled to random positions
+ using the
+\series bold
+shuffle_seq
+\series default
+ biotool:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | shuffle_seq
+\end_layout
+
+\begin_layout Subsection
+How to split sequences into overlapping subsequences?
+\end_layout
+
+\begin_layout Standard
+Sequences can be slit into overlapping subsequences with the
+\series bold
+split_seq
+\series default
+ biotool.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | split_seq --word_size=20 --uniq
+\end_layout
+
+\begin_layout Subsection
+How to determine the oligo frequency?
+\end_layout
+
+\begin_layout Standard
+In order to determine if any oligo usage is over represented in one or more
+ sequences you can determine the frequency of oligos of a given size with
+
+\series bold
+oligo_freq
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | oligo_freq --word_size=4
+\end_layout
+
+\begin_layout Standard
+And if you have more than one sequence and want to accumulate the frequences
+ you need the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+all switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | oligo_freq --word_size=4 --all
+\end_layout
+
+\begin_layout Standard
+To get a meaningful result you need to write the resulting frequencies as
+ a table with
+\series bold
+write_tab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-tab"
+
+\end_inset
+
+), but first it is important to
+\series bold
+grab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-grab"
+
+\end_inset
+
+) the records with the frequencies to avoid full length sequences in the
+ table:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | oligo_freq --word_size=4 --all | grab --pattern=OLIGO --keys_only
+\end_layout
+
+\begin_layout LyX-Code
+| write_tab --no_stream
+\end_layout
+
+\begin_layout Standard
+And the resulting frequency table can be sorted with Unix sort (man sort).
+\end_layout
+
+\begin_layout Subsection
+How to search for sequences in genomes?
+\end_layout
+
+\begin_layout Standard
+See the following biotool:
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+patscan_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-patscan"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+blat_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAT"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+blast_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAST"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+
+\series bold
+vmatch_seq
+\series default
+
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-Vmatch"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to search sequences for a pattern?
+\begin_inset LatexCommand label
+name "sub:How-to-use-patscan"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+It is possible to search sequences in the data stream for patterns using
+ the
+\series bold
+patscan_seq
+\series default
+ biotool which utilizes the powerful scan_for_matches engine.
+ Consult the documentation for scan_for_matches in order to learn how to
+ define patterns (the documentation is included in Appendix\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sec:scan_for_matches-README"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Standard
+To search all sequences for a simple pattern consisting of the sequence
+ ATCGATCG allowing for 3 mismatches, 2 insertions and 1 deletion:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | patscan_seq --pattern='ATCGATCG[3,2,1]'
+\end_layout
+
+\begin_layout Standard
+The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern switch takes a comma seperated list of patterns, so if you want
+ to search for more that one pattern do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern='ATCGATCG[3,2,1],GCTAGCTA[3,2,1]'
+\end_layout
+
+\begin_layout Standard
+It is also possible to have a list of different patterns to search for in
+ a file with one pattern per line.
+ In order to get
+\series bold
+patscan_seq
+\series default
+ to read these patterns use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+pattern_in switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern_in=<file>
+\end_layout
+
+\begin_layout Standard
+To also scan the complementary strand in nucleotide sequences (
+\series bold
+patscan_seq
+\series default
+ automagically determines the sequence type) you need to add the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+comp switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern=<pattern> --comp
+\end_layout
+
+\begin_layout Standard
+It is also possible to use
+\series bold
+patscan_seq
+\series default
+ to output those records that does not contain a certain pattern by using
+ the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+invert switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | patscan_seq --pattern=<pattern> --invert
+\end_layout
+
+\begin_layout Standard
+Finally,
+\series bold
+patscan_seq
+\series default
+ can also scan for patterns in a given genome sequence, instead of sequences
+ in the stream, using the
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+genome switch:
+\end_layout
+
+\begin_layout LyX-Code
+patscan --pattern=<pattern> --genome=<genome>
+\end_layout
+
+\begin_layout Subsection
+How to use BLAT for sequence search?
+\begin_inset LatexCommand label
+name "sub:How-to-use-BLAT"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Sequences in the data stream can be matched against supported genomes using
+
+\series bold
+blat_seq
+\series default
+ which is a biotool using BLAT as the name might suggest.
+ Currently only Mouse and Human genomes are available and it is not possible
+ to use OOC files since there is still a need for a local repository for
+ genome files.
+ Otherwise it is just:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | blat_seq --genome=<genome>
+\end_layout
+
+\begin_layout Standard
+The search results can then be written to file with
+\series bold
+write_psl
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-PSL"
+
+\end_inset
+
+) or
+\series bold
+write_bed
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-BED"
+
+\end_inset
+
+) allthough with
+\series bold
+write_bed
+\series default
+ some information will be lost).
+ It is also possible to plot chromosome distribution of the search results
+ using
+\series bold
+plot_chrdist
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-plot-chrdist"
+
+\end_inset
+
+) or the distribution of the match lengths using
+\series bold
+plot_lendist
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-plot-lendist"
+
+\end_inset
+
+) or a karyogram with the hits using
+\series bold
+plot_karyogram
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-plot-karyogram"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to use BLAST for sequence search?
+\begin_inset LatexCommand label
+name "sub:How-to-use-BLAST"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Two biotools exist for blasting sequences:
+\series bold
+create_blast_db
+\series default
+ is used to create the BLAST database required for BLAST which is queried
+ using the biotool
+\series bold
+blast_seq
+\series default
+.
+ So in order to create a BLAST database from sequences in the data stream
+ you simple run:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_blast_db --database=my_database --no_stream
+\end_layout
+
+\begin_layout Standard
+The type of sequence to use for the database is automagically determined
+ by
+\series bold
+create_blast_db
+\series default
+, but don't have a mixture of peptide and nucleic acids sequences in the
+ stream.
+ The
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+database switch takes a path as argument, but will default to 'blastdb_<time_sta
+mp> if not set.
+\end_layout
+
+\begin_layout Standard
+The resulting database can now be queried with sequences in another data
+ stream using
+\series bold
+blast_seq
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | blast_seq --database=my_database
+\end_layout
+
+\begin_layout Standard
+Again, the sequence type is determined automagically and the appropriate
+ BLAST program is guessed (see below table), however, the program name can
+ be overruled with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+program switch.
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="5" columns="3">
+<features>
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<row bottomline="true">
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Subject sequence
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Query sequence
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Program guess
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+blastn
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+blastp
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+blastx
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Nucleotide
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+Protein
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Standard
+tblastn
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Finally, it is also possible to use
+\series bold
+blast_seq
+\series default
+ for blasting sequences agains a preformatted genome using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+genome switch instead of the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+database switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | blast_seq --genome=<genome>
+\end_layout
+
+\begin_layout Subsection
+How to use Vmatch for sequence search?
+\begin_inset LatexCommand label
+name "sub:How-to-use-Vmatch"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The powerful suffix array software package Vmatch
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://www.vmatch.de/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+ can be used for exact mapping of sequences against indexed genomes using
+ the biotool
+\series bold
+vmatch_seq
+\series default
+, which will e.g.
+ map 700000 ESTs to the human genome locating all 160 mio hits in less than
+ an hour.
+ Only nucleotide sequences and sequences longer than 11 nucleotides will
+ be mapped.
+ It is recommended that sequences consisting of mostly one nucleotide type
+ are removed.
+ This can be done with the
+\series bold
+analyze_seq
+\series default
+ biotool
+\begin_inset LatexCommand eqref
+reference "sub:How-to-analyze"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome>
+\end_layout
+
+\begin_layout Standard
+It is also possible to allow for mismatches using the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+hamming_dist switch.
+ So to allow for 2 mismatches:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=2
+\end_layout
+
+\begin_layout Standard
+Or to allow for 10% mismathing nucleotides:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=10p
+\end_layout
+
+\begin_layout Standard
+To allow both indels and mismatches use the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+edit_dist switch.
+ So to allow for one mismatch or one indel:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=1
+\end_layout
+
+\begin_layout Standard
+Or to allow for 5% indels or mismatches:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | vmatch_seq --genome=<genome> --hamming_dist=5p
+\end_layout
+
+\begin_layout Standard
+Note that using
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+hamming_dist or
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+edit_dist greatly slows down vmatch considerably --- use with care.
+\end_layout
+
+\begin_layout Standard
+The resulting SCORE key can be replaced to hold the number of genome matches
+ of a given sequence (multi-mappers) is the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+count switch is given.
+\end_layout
+
+\begin_layout Subsection
+How to find all matches between sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-find-matches"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+All matches between two sequences can be determined with the biotool
+\series bold
+match_seq
+\series default
+.
+ The match finding engine underneath the hood of
+\series bold
+match_seq
+\series default
+ is the super fast suffix tree program MUMmer
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://mummer.sourceforge.net/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+, which will locate all forward and reverse matches between huge sequences
+ in a matter of minutes (if the repeat count is not too high and if the
+ word size used is appropriate).
+ Matching two
+\emph on
+Helicobacter pylori
+\emph default
+ genomes (1.7Mbp) takes around 10 seconds:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | match_seq --word_size=20 --direction=both
+\end_layout
+
+\begin_layout Standard
+The output from
+\series bold
+match_seq
+\series default
+ can be used to generate a dot plot with
+\series bold
+plot_matches
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-generate-dotplot"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to align sequences?
+\begin_inset LatexCommand label
+name "sub:How-to-align"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Sequences in the stream can be aligned with the
+\series bold
+align_seq
+\series default
+ biotool that uses Muscle
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://www.drive5.com/muscle/muscle.html"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+ as aligment engine.
+ Currently you cannot change any of the Muscle alignment parameters and
+
+\series bold
+align_seq
+\series default
+ will create an alignment based on the defaults (which are really good!):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | align_seq
+\end_layout
+
+\begin_layout Standard
+The aligned output can be written to file in FASTA format using
+\series bold
+write_fasta
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-fasta"
+
+\end_inset
+
+) or in pretty text using
+\series bold
+write_align
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-alignment"
+
+\end_inset
+
+).
+\end_layout
+
+\begin_layout Subsection
+How to create a weight matrix?
+\end_layout
+
+\begin_layout Standard
+If you want a weight matrix to show the sequence composition of a stack
+ of sequences you can use the biotool create_weight_matrix:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_weight_matrix
+\end_layout
+
+\begin_layout Standard
+The result can be output in percent using the
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+percent switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_weight_matrix --percent
+\end_layout
+
+\begin_layout Standard
+The weight matrix can be written as tabular output with
+\series bold
+write_tab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-tab"
+
+\end_inset
+
+) after removeing the records containing SEQ with
+\series bold
+grab
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-grab"
+
+\end_inset
+
+):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | create_weight_matrix | grab --invert --keys=SEQ --keys_only
+\end_layout
+
+\begin_layout LyX-Code
+| write_tab --no_stream
+\end_layout
+
+\begin_layout Standard
+The V0 column will hold the residue, while the rest of the columns will
+ hold the frequencies for each sequence position.
+\end_layout
+
+\begin_layout Section
+Plotting
+\end_layout
+
+\begin_layout Standard
+There exists several biotools for plotting.
+ Some of these are based on GNUplot
+\begin_inset Foot
+status open
+
+\begin_layout Standard
+\begin_inset LatexCommand url
+target "http://www.gnuplot.info/"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+, which is an extremely powerful platform to generate all sorts of plots
+ and even though GNUplot has quite a steep learning curve, the biotools
+ utilizing GNUplot are simple to use.
+ GNUplot is able to output a lot of different formats (called terminals
+ in GNUplot), but the biotools focusses on three formats only:
+\end_layout
+
+\begin_layout Enumerate
+The 'dumb' terminal is default to the GNUplot based biotools and will output
+ a plot in crude ASCII text (Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Dumb-terminal"
+
+\end_inset
+
+).
+ This is quite nice for a quick and dirty plot to get an overview of your
+ data .
+\end_layout
+
+\begin_layout Enumerate
+The 'post' or 'postscript' terminal output postscript code which is publication
+ grade graphics that can be viewed with applications such as Ghostview,
+ Photoshop, and Preview.
+\end_layout
+
+\begin_layout Enumerate
+The 'svg' terminal output's scalable vector graphics (SVG) which is a vector
+ based format.
+ SVG is great because you can edit the resulting plot using Photoshop or
+ Inkscape
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+Inkscape is a really handy drawing program that is free and open source.
+ Availble at
+\begin_inset LatexCommand htmlurl
+target "http://www.inkscape.org"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+ if you want to add additional labels, captions, arrows, and so on and then
+ save the result in different formats, such as postscript without loosing
+ resolution.
+\end_layout
+
+\begin_layout Standard
+The biotools for plotting that are not based on GNUplot only output SVG
+ (that may change in the future).
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename lendist_ascii.png
+ lyxscale 70
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Dumb-terminal"
+
+\end_inset
+
+Dumb terminal
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+The output of a length distribution plot in the default 'dumb terminal'
+ to the terminal window.
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a histogram?
+\begin_inset LatexCommand label
+name "How-to-plot-histogram"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+A generic histogram for a given value can be plotted with the biotool
+\series bold
+plot_histogram
+\series default
+ (Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Histogram"
+
+\end_inset
+
+):
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_histogram --key=TISSUE --no_stream
+\end_layout
+
+\begin_layout Standard
+(Figure missing)
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align left
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename histogram.png
+ lyxscale 70
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Histogram"
+
+\end_inset
+
+Histogram
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a length distribution?
+\begin_inset LatexCommand label
+name "sub:How-to-plot-lendist"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Plotting of length distributions, weather sequence lengths, patterns lengths,
+ hit lengths,
+\emph on
+etc.
+
+\emph default
+ is a really handy thing and can be done with the the biotool
+\series bold
+plot_lendist
+\series default
+.
+ If you have a file with FASTA entries and want to plot the length distribution
+ you do it like this:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | length_seq
+\end_layout
+
+\begin_layout LyX-Code
+| plot_lendist --key=SEQ_LEN --no_stream
+\end_layout
+
+\begin_layout Standard
+The result will be written to the default dumb terminal and will look like
+ Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Dumb-terminal"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+If you instead want the result in postscript format you can do:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_lendist --key=SEQ_LEN --terminal=post --result_out=file.ps
+\end_layout
+
+\begin_layout Standard
+That will generate the plot and save it to file, but not interrupt the data
+ stream which can then be used in further analysis.
+ You can also save the plot implicetly using '>', however, it is then important
+ to terminate the stream with the
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream switch:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_lendist --key=SEQ_LEN --terminal=post --no_stream > file.ps
+\end_layout
+
+\begin_layout Standard
+The resulting plot can be seen in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Length-distribution"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename lendist.ps
+ lyxscale 50
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Length-distribution"
+
+\end_inset
+
+Length distribution
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+Length distribution of 630 piRNA like RNAs.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a chromosome distribution?
+\begin_inset LatexCommand label
+name "sub:How-to-plot-chrdist"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+If you have the result of a sequence search against a multi chromosome genome,
+ it is very practical to be able to plot the distribution of search hits
+ on the different chromosomes.
+ This can be done with
+\series bold
+plot_chrdist
+\series default
+:
+\end_layout
+
+\begin_layout LyX-Code
+read_fasta --data_in=<file> | blat_genome | plot_chrdist --no_stream
+\end_layout
+
+\begin_layout Standard
+The above example will result in a crude plot using the 'dumb' terminal,
+ and if you want to mess around with the results from the BLAT search you
+ probably want to save the result to file first (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-write-PSL"
+
+\end_inset
+
+).
+ To plot the chromosome distribution from the saved search result you can
+ do:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=file.bed | plot_chrdist --terminal=post --result_out=plot.ps
+\end_layout
+
+\begin_layout Standard
+That will result in the output show in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Chromosome-distribution"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename chrdist.ps
+ lyxscale 50
+ width 12cm
+ rotateAngle 90
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Chromosome-distribution"
+
+\end_inset
+
+Chromosome distribution
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to generate a dotplot?
+\begin_inset LatexCommand label
+name "sub:How-to-generate-dotplot"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+A dotplot is a powerful way to get an overview of the size and location
+ of sequence insertions, deletions, and duplications between two sequences.
+ Generating a dotplot with biotools is a two step process where you initially
+ find all matches between two sequences using the tool
+\series bold
+match_seq
+\series default
+ (see\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "sub:How-to-find-matches"
+
+\end_inset
+
+) and plot the resulting matches with
+\series bold
+plot_matches
+\series default
+.
+ Matching and plotting two
+\emph on
+Helicobacter pylori
+\emph default
+ genomes (1.7Mbp) takes around 10 seconds:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | match_seq | plot_matches --terminal=post --result_out=plot.ps
+\end_layout
+
+\begin_layout Standard
+The resulting dotplot is in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Dotplot"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename dotplot.ps
+ lyxscale 50
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Dotplot"
+
+\end_inset
+
+Dotplot
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+Forward matches are displayed in green while reverse matches are displayed
+ in red.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a sequence logo?
+\end_layout
+
+\begin_layout Standard
+Sequence logos can be generate with
+\series bold
+plot_seqlogo
+\series default
+.
+ The sequnce type is determined automagically and an entropy scale of 2
+ bits and 4 bits is used for nucleotide and peptide sequences, respectively
+\begin_inset Foot
+status collapsed
+
+\begin_layout Standard
+\begin_inset LatexCommand htmlurl
+target "http://www.ccrnp.ncifcrf.gov/~toms/paper/hawaii/latex/node5.html"
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_seqlogo --no_stream --result_out=seqlogo.svg
+\end_layout
+
+\begin_layout Standard
+An example of a sequence logo can be seen in Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Sequence-logo"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename seqlogo.png
+ lyxscale 50
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Sequence-logo"
+
+\end_inset
+
+Sequence logo
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsection
+How to plot a karyogram?
+\begin_inset LatexCommand label
+name "sub:How-to-plot-karyogram"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+To plot search hits on genomes use
+\series bold
+plot_karyogram
+\series default
+, which will output a nice karyogram in SVG graphics:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | plot_karyogram --result_out=karyogram.svg
+\end_layout
+
+\begin_layout Standard
+The banding data is taken from the UCSC genome browser database and currently
+ only Human and Mouse is supported.
+ Fig.\InsetSpace ~
+
+\begin_inset LatexCommand ref
+reference "fig:Karyogram"
+
+\end_inset
+
+ shows the distribution of piRNA like RNAs matched to the Human genome.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Standard
+\noindent
+\align center
+\begin_inset Graphics
+ filename karyogram.png
+ lyxscale 35
+ width 12cm
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+\begin_inset LatexCommand label
+name "fig:Karyogram"
+
+\end_inset
+
+Karyogram
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Quote
+Hits from a search of piRNA like RNAs in the Human genome is displayed as
+ short horizontal bars.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+Uploading Results
+\end_layout
+
+\begin_layout Subsection
+How do I display my results in the UCSC Genome Browser?
+\end_layout
+
+\begin_layout Standard
+Results from the list of biotools below can be uploaded directly to a local
+ mirror of the UCSC Genome Browser using the biotool
+\series bold
+upload_to_ucsc
+\series default
+:
+\end_layout
+
+\begin_layout Itemize
+patscan_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-patscan"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+blat_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAT"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+blast_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-BLAST"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+vmatch_seq
+\begin_inset LatexCommand eqref
+reference "sub:How-to-use-Vmatch"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The syntax for uploading data the most simple way requires two mandatory
+ switches:
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+database, which is the UCSC database name (such as hg18, mm9, etc.) and
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+table which should be the users initials followed by an underscore and a
+ short description of the data:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | upload_to_ucsc --database=hg18 --table=mah_snoRNAs
+\end_layout
+
+\begin_layout Standard
+The
+\series bold
+upload_to_ucsc
+\series default
+ biotool modifies the users ~/ucsc/my_tracks.ra file automagically (a backup
+ is created with the name ~/ucsc/my_tracks.ra~) with default values that
+ can be overridden using the following switches:
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+short_label - Short label for track - Default=database->table
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+long_label - Long label for track - Default=database->table
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+group - Track group name - Default=<user name as defined in env>
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+priority - Track display priority - Default=1
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+color - Track color - Default=147,73,42
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+chunk_size - Chunks for loading - Default=10000000
+\end_layout
+
+\begin_layout Itemize
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+visibility - Track visibility - Default=pack
+\end_layout
+
+\begin_layout Standard
+Also, data in BED or PSL format can be uploaded with
+\series bold
+upload_to_ucsc
+\series default
+ as long as these reference to genomes and chromosomes existing in the UCSC
+ Genome Browser:
+\end_layout
+
+\begin_layout LyX-Code
+read_bed --data_in=<bed file> | upload_to_ucsc ...
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+read_psl --data_in=<psl file> | upload_to_ucsc ...
+\end_layout
+
+\begin_layout Section
+Power Scripting
+\end_layout
+
+\begin_layout Standard
+It is possible to do commandline scripting of biotool records using Perl.
+ Because a biotool record essentially is a hash structure, you can pass
+ records to
+\series bold
+bioscript
+\series default
+ command, which is a wrapper around the Perl executable that allows direct
+ manipulations of the records using the power of Perl.
+\end_layout
+
+\begin_layout Standard
+In the below example we replace in all records the value to the CHR key
+ with a forthrunning number:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | bioscript 'while($r=get_record(
+\backslash
+*STDIN)){$r->{CHR}=$i++; put_record($r)}'
+\end_layout
+
+\begin_layout Standard
+Something more useful would probably be to create custom FASTA headers.
+ E.g.
+ if we read in a BED file, lookup the genomic sequence, create a custom
+ FASTA header with
+\series bold
+bioscript
+\series default
+ and output FASTA entries:
+\end_layout
+
+\begin_layout LyX-Code
+...
+ | bioscript 'while($r=get_record(
+\backslash
+*STDIN)){$r->{SEQ_NAME}= //
+\end_layout
+
+\begin_layout LyX-Code
+join("_",$r->{CHR},$r->{CHR_BEG},$r->{CHR_END}); put_record($r)}'
+\end_layout
+
+\begin_layout Standard
+And the output:
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_21567527_21567550
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_693380_693403
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_13859534_13859557
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_9005090_9005113
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_2106825_2106848
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout LyX-Code
+>chr2L_14649031_14649054
+\end_layout
+
+\begin_layout LyX-Code
+taccaaacggatgcctcagacatc
+\end_layout
+
+\begin_layout Section
+Trouble shooting
+\end_layout
+
+\begin_layout Standard
+Shoot the messenger!
+\end_layout
+
+\begin_layout Section
+\start_of_appendix
+Keys
+\begin_inset LatexCommand label
+name "sec:Keys"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+HIT
+\end_layout
+
+\begin_layout Standard
+HIT_BEG
+\end_layout
+
+\begin_layout Standard
+HIT_END
+\end_layout
+
+\begin_layout Standard
+HIT_LEN
+\end_layout
+
+\begin_layout Standard
+HIT_NAME
+\end_layout
+
+\begin_layout Standard
+PATTERN
+\end_layout
+
+\begin_layout Section
+Switches
+\begin_inset LatexCommand label
+name "sec:Switches"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_in
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+stream_out
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+no_stream
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+data_in
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+result_out
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status collapsed
+
+\begin_layout Standard
+
+-
+\backslash
+/-
+\end_layout
+
+\end_inset
+
+num
+\end_layout
+
+\begin_layout Section
+scan_for_matches README
+\begin_inset LatexCommand label
+name "sec:scan_for_matches-README"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches:
+\end_layout
+
+\begin_layout LyX-Code
+ A Program to Scan Nucleotide or Protein Sequences for Matching Patterns
+\end_layout
+
+\begin_layout LyX-Code
+ Ross Overbeek
+\end_layout
+
+\begin_layout LyX-Code
+ MCS
+\end_layout
+
+\begin_layout LyX-Code
+ Argonne National Laboratory
+\end_layout
+
+\begin_layout LyX-Code
+ Argonne, IL 60439
+\end_layout
+
+\begin_layout LyX-Code
+ USA
+\end_layout
+
+\begin_layout LyX-Code
+Scan_for_matches is a utility that we have written to search for
+\end_layout
+
+\begin_layout LyX-Code
+patterns in DNA and protein sequences.
+ I wrote most of the code,
+\end_layout
+
+\begin_layout LyX-Code
+although David Joerg and Morgan Price wrote sections of an
+\end_layout
+
+\begin_layout LyX-Code
+earlier version.
+ The whole notion of pattern matching has a rich
+\end_layout
+
+\begin_layout LyX-Code
+history, and we borrowed liberally from many sources.
+ However, it is
+\end_layout
+
+\begin_layout LyX-Code
+worth noting that we were strongly influenced by the elegant tools
+\end_layout
+
+\begin_layout LyX-Code
+developed and distributed by David Searls.
+ My intent is to make the
+\end_layout
+
+\begin_layout LyX-Code
+existing tool available to anyone in the research community that might
+\end_layout
+
+\begin_layout LyX-Code
+find it useful.
+ I will continue to try to fix bugs and make suggested
+\end_layout
+
+\begin_layout LyX-Code
+enhancements, at least until I feel that a superior tool exists.
+\end_layout
+
+\begin_layout LyX-Code
+Hence, I would appreciate it if all bug reports and suggestions are
+\end_layout
+
+\begin_layout LyX-Code
+directed to me at Overbeek@mcs.anl.gov.
+
+\end_layout
+
+\begin_layout LyX-Code
+I will try to log all bug fixes and report them to users that send me
+\end_layout
+
+\begin_layout LyX-Code
+their email addresses.
+ I do not require that you give me your name
+\end_layout
+
+\begin_layout LyX-Code
+and address.
+ However, if you do give it to me, I will try to notify
+\end_layout
+
+\begin_layout LyX-Code
+you of serious problems as they are discovered.
+\end_layout
+
+\begin_layout LyX-Code
+Getting Started:
+\end_layout
+
+\begin_layout LyX-Code
+ The distribution should contain at least the following programs:
+\end_layout
+
+\begin_layout LyX-Code
+ README - This document
+\end_layout
+
+\begin_layout LyX-Code
+ ggpunit.c - One of the two source files
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches.c - The second source file
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ run_tests - A perl script to test things
+\end_layout
+
+\begin_layout LyX-Code
+ show_hits - A handy perl script
+\end_layout
+
+\begin_layout LyX-Code
+ test_dna_input - Test sequences for DNA
+\end_layout
+
+\begin_layout LyX-Code
+ test_dna_patterns - Test patterns for DNA scan
+\end_layout
+
+\begin_layout LyX-Code
+ test_output - Desired output from test
+\end_layout
+
+\begin_layout LyX-Code
+ test_prot_input - Test protein sequences
+\end_layout
+
+\begin_layout LyX-Code
+ test_prot_patterns - Test patterns for proteins
+\end_layout
+
+\begin_layout LyX-Code
+ testit - a perl script used for test
+\end_layout
+
+\begin_layout LyX-Code
+ Only the first three files are required.
+ The others are useful,
+\end_layout
+
+\begin_layout LyX-Code
+ but only if you have Perl installed on your system.
+ If you do
+\end_layout
+
+\begin_layout LyX-Code
+ have Perl, I suggest that you type
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ which perl
+\end_layout
+
+\begin_layout LyX-Code
+ to find out where it installed.
+ On my system, I get the following
+\end_layout
+
+\begin_layout LyX-Code
+ response:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ clone% which perl
+\end_layout
+
+\begin_layout LyX-Code
+ /usr/local/bin/perl
+\end_layout
+
+\begin_layout LyX-Code
+ indicating that Perl is installed in /usr/local/bin.
+ Anyway, once
+\end_layout
+
+\begin_layout LyX-Code
+ you know where it is installed, edit the first line of files
+\end_layout
+
+\begin_layout LyX-Code
+ testit
+\end_layout
+
+\begin_layout LyX-Code
+ show_hits
+\end_layout
+
+\begin_layout LyX-Code
+ replacing /usr/local/bin/perl with the appropriate location.
+ I
+\end_layout
+
+\begin_layout LyX-Code
+ will assume that you can do this, although it is not critical (it
+\end_layout
+
+\begin_layout LyX-Code
+ is needed only to test the installation and to use the "show_hits"
+\end_layout
+
+\begin_layout LyX-Code
+ utility).
+ Perl is not required to actually install and run
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches.
+
+\end_layout
+
+\begin_layout LyX-Code
+ If you do not have Perl, I suggest you get it and install it (it
+\end_layout
+
+\begin_layout LyX-Code
+ is a wonderful utility).
+ Information about Perl and how to get it
+\end_layout
+
+\begin_layout LyX-Code
+ can be found in the book "Programming Perl" by Larry Wall and
+\end_layout
+
+\begin_layout LyX-Code
+ Randall L.
+ Schwartz, published by O'Reilly & Associates, Inc.
+\end_layout
+
+\begin_layout LyX-Code
+ To get started, you will need to compile the program.
+ I do this
+\end_layout
+
+\begin_layout LyX-Code
+ using
+\end_layout
+
+\begin_layout LyX-Code
+ gcc -O -o scan_for_matches ggpunit.c scan_for_matches.c
+\end_layout
+
+\begin_layout LyX-Code
+ If you do not use GNU C, use
+\end_layout
+
+\begin_layout LyX-Code
+ cc -O -DCC -o scan_for_matches ggpunit.c scan_for_matches.c
+\end_layout
+
+\begin_layout LyX-Code
+ which works on my Sun.
+
+\end_layout
+
+\begin_layout LyX-Code
+ Once you have compiled scan_for_matches, you can verify that it
+\end_layout
+
+\begin_layout LyX-Code
+ works with
+\end_layout
+
+\begin_layout LyX-Code
+ clone% run_tests tmp
+\end_layout
+
+\begin_layout LyX-Code
+ clone% diff tmp test_output
+\end_layout
+
+\begin_layout LyX-Code
+ You may get a few strange lines of the sort
+\end_layout
+
+\begin_layout LyX-Code
+ clone% run_tests tmp
+\end_layout
+
+\begin_layout LyX-Code
+ rm: tmp: No such file or directory
+\end_layout
+
+\begin_layout LyX-Code
+ clone% diff tmp test_output
+\end_layout
+
+\begin_layout LyX-Code
+ These should cause no concern.
+ However, if the "diff" shows that
+\end_layout
+
+\begin_layout LyX-Code
+ tmp and test_output are different, contact me (you have a
+\end_layout
+
+\begin_layout LyX-Code
+ problem).
+
+\end_layout
+
+\begin_layout LyX-Code
+ You should now be able to use scan_for_matches by following the
+\end_layout
+
+\begin_layout LyX-Code
+ instructions given below (which is all the normal user should have
+\end_layout
+
+\begin_layout LyX-Code
+ to understand, once things are installed properly).
+\end_layout
+
+\begin_layout LyX-Code
+ ==============================================================
+\end_layout
+
+\begin_layout LyX-Code
+How to run scan_for_matches:
+\end_layout
+
+\begin_layout LyX-Code
+ To run the program, you type need to create two files
+\end_layout
+
+\begin_layout LyX-Code
+ 1.
+ the first file contains the pattern you wish to scan for; I'll
+\end_layout
+
+\begin_layout LyX-Code
+ call this file pat_file in what follows (but any name is ok)
+\end_layout
+
+\begin_layout LyX-Code
+ 2.
+ the second file contains a set of sequences to scan.
+ These
+\end_layout
+
+\begin_layout LyX-Code
+ should be in "fasta format".
+ Just look at the contents of
+\end_layout
+
+\begin_layout LyX-Code
+ test_dna_input to see examples of this format.
+ Basically,
+\end_layout
+
+\begin_layout LyX-Code
+ each sequence begins with a line of the form
+\end_layout
+
+\begin_layout LyX-Code
+ >sequence_id
+\end_layout
+
+\begin_layout LyX-Code
+ and is followed by one or more lines containing the sequence.
+\end_layout
+
+\begin_layout LyX-Code
+ Once these files have been created, you just use
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches pat_file < input_file
+\end_layout
+
+\begin_layout LyX-Code
+ to scan all of the input sequences for the given pattern.
+ As an
+\end_layout
+
+\begin_layout LyX-Code
+ example, suppose that pat_file contains a single line of the form
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...7 3...8 ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ Then,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ should produce two "hits".
+ When I run this on my machine, I get
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[6,27]
+\end_layout
+
+\begin_layout LyX-Code
+ cguaacc ggttaacc gguuacg
+\end_layout
+
+\begin_layout LyX-Code
+ >tst2:[6,27]
+\end_layout
+
+\begin_layout LyX-Code
+ CGUAACC GGTTAACC GGUUACG
+\end_layout
+
+\begin_layout LyX-Code
+ clone%
+\end_layout
+
+\begin_layout LyX-Code
+Simple Patterns Built by Matching Ranges and Reverse Complements
+\end_layout
+
+\begin_layout LyX-Code
+ Let me first explain this simple pattern:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...7 3...8 ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ The pattern consists of three "pattern units" separated by spaces.
+\end_layout
+
+\begin_layout LyX-Code
+ The first pattern unit is
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...7
+\end_layout
+
+\begin_layout LyX-Code
+ which means "match 4 to 7 characters and call them p1".
+ The
+\end_layout
+
+\begin_layout LyX-Code
+ second pattern unit is
+\end_layout
+
+\begin_layout LyX-Code
+ 3...8
+\end_layout
+
+\begin_layout LyX-Code
+ which means "then match 3 to 8 characters".
+ The last pattern unit
+\end_layout
+
+\begin_layout LyX-Code
+ is
+\end_layout
+
+\begin_layout LyX-Code
+ ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ which means "match the reverse complement of p1".
+ The first
+\end_layout
+
+\begin_layout LyX-Code
+ reported hit is shown as
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[6,27]
+\end_layout
+
+\begin_layout LyX-Code
+ cguaacc ggttaacc gguuacg
+\end_layout
+
+\begin_layout LyX-Code
+ which states that characters 6 through 27 of sequence tst1 were
+\end_layout
+
+\begin_layout LyX-Code
+ matched.
+ "cguaac" matched the first pattern unit, "ggttaacc" the
+\end_layout
+
+\begin_layout LyX-Code
+ second, and "gguuacg" the third.
+ This is an example of a common
+\end_layout
+
+\begin_layout LyX-Code
+ type of pattern used to search for sections of DNA or RNA that
+\end_layout
+
+\begin_layout LyX-Code
+ would fold into a hairpin loop.
+\end_layout
+
+\begin_layout LyX-Code
+Searching Both Strands
+\end_layout
+
+\begin_layout LyX-Code
+ Now for a short aside: scan_for_matches only searched the
+\end_layout
+
+\begin_layout LyX-Code
+ sequences in the input file; it did not search the opposite
+\end_layout
+
+\begin_layout LyX-Code
+ strand.
+ With a pattern of the sort we just used, there is not
+\end_layout
+
+\begin_layout LyX-Code
+ need o search the opposite strand.
+ However, it is normally the
+\end_layout
+
+\begin_layout LyX-Code
+ case that you will wish to search both the sequence and the
+\end_layout
+
+\begin_layout LyX-Code
+ opposite strand (i.e., the reverse complement of the sequence).
+\end_layout
+
+\begin_layout LyX-Code
+ To do that, you would just use the "-c" command line.
+ For example,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches -c pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ Hits on the opposite strand will show a beginning location greater
+\end_layout
+
+\begin_layout LyX-Code
+ than te end location of the match.
+\end_layout
+
+\begin_layout LyX-Code
+Defining Pairing Rules and Allowing Mismatches, Insertions, and Deletions
+\end_layout
+
+\begin_layout LyX-Code
+ Let us stop now and ask "What additional features would one need to
+\end_layout
+
+\begin_layout LyX-Code
+ really find the kinds of loop structures that characterize tRNAs,
+\end_layout
+
+\begin_layout LyX-Code
+ rRNAs, and so forth?" I can immediately think of two:
+\end_layout
+
+\begin_layout LyX-Code
+ a) you will need to be able to allow non-standard pairings
+\end_layout
+
+\begin_layout LyX-Code
+ (those other than G-C and A-U), and
+\end_layout
+
+\begin_layout LyX-Code
+ b) you will need to be able to tolerate some number of
+\end_layout
+
+\begin_layout LyX-Code
+ mismatches and bulges.
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ Let me first show you how to handle non-standard "rules for
+\end_layout
+
+\begin_layout LyX-Code
+ pairing in reverse complements".
+ Consider the following pattern,
+\end_layout
+
+\begin_layout LyX-Code
+ which I show as two line (you may use as many lines as you like in
+\end_layout
+
+\begin_layout LyX-Code
+ forming a pattern, although you can only break a pattern at points
+\end_layout
+
+\begin_layout LyX-Code
+ where space would be legal):
+\end_layout
+
+\begin_layout LyX-Code
+ r1={au,ua,gc,cg,gu,ug,ga,ag}
+\end_layout
+
+\begin_layout LyX-Code
+ p1=2...3 0...4 p2=2...5 1...5 r1~p2 0...4 ~p1
+\end_layout
+
+\begin_layout LyX-Code
+ The first "pattern unit" does not actually match anything; rather,
+\end_layout
+
+\begin_layout LyX-Code
+ it defines a "pairing rule" in which standard pairings are
+\end_layout
+
+\begin_layout LyX-Code
+ allowed, as well as G-A and A-G (in case you wondered, Us and Ts
+\end_layout
+
+\begin_layout LyX-Code
+ and upper and lower case can be used interchangably; for example
+\end_layout
+
+\begin_layout LyX-Code
+ r1={AT,UA,gc,cg} could be used to define the "standard rule" for
+\end_layout
+
+\begin_layout LyX-Code
+ pairings).
+ The second line consists of six pattern units which
+\end_layout
+
+\begin_layout LyX-Code
+ may be interpreted as follows:
+\end_layout
+
+\begin_layout LyX-Code
+ p1=2...3 match 2 or 3 characters (call it p1)
+\end_layout
+
+\begin_layout LyX-Code
+ 0...4 match 0 to 4 characters
+\end_layout
+
+\begin_layout LyX-Code
+ p2=2...5 match 2 to 5 characters (call it p2)
+\end_layout
+
+\begin_layout LyX-Code
+ 1...5 match 1 to 5 characters
+\end_layout
+
+\begin_layout LyX-Code
+ r1~p2 match the reverse complement of p2,
+\end_layout
+
+\begin_layout LyX-Code
+ allowing G-A and A-G pairs
+\end_layout
+
+\begin_layout LyX-Code
+ 0...4 match 0 to 4 characters
+\end_layout
+
+\begin_layout LyX-Code
+ ~p1 match the reverse complement of p1
+\end_layout
+
+\begin_layout LyX-Code
+ allowing only G-C, C-G, A-T, and T-A pairs
+\end_layout
+
+\begin_layout LyX-Code
+ Thus, r1~p2 means "match the reverse complement of p2 using rule r1".
+\end_layout
+
+\begin_layout LyX-Code
+ Now let us consider the issue of tolerating mismatches and bulges.
+\end_layout
+
+\begin_layout LyX-Code
+ You may add a "qualifier" to the pattern unit that gives the
+\end_layout
+
+\begin_layout LyX-Code
+ tolerable number of "mismatches, deletions, and insertions".
+\end_layout
+
+\begin_layout LyX-Code
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=10...10 3...8 ~p1[1,2,1]
+\end_layout
+
+\begin_layout LyX-Code
+ means that the third pattern unit must match 10 characters,
+\end_layout
+
+\begin_layout LyX-Code
+ allowing one "mismatch" (a pairing other than G-C, C-G, A-T, or
+\end_layout
+
+\begin_layout LyX-Code
+ T-A), two deletions (a deletion is a character that occurs in p1,
+\end_layout
+
+\begin_layout LyX-Code
+ but has been "deleted" from the string matched by ~p1), and one
+\end_layout
+
+\begin_layout LyX-Code
+ insertion (an "insertion" is a character that occurs in the string
+\end_layout
+
+\begin_layout LyX-Code
+ matched by ~p1, but not for which no corresponding character
+\end_layout
+
+\begin_layout LyX-Code
+ occurs in p1).
+ In this case, the pattern would match
+\end_layout
+
+\begin_layout LyX-Code
+ ACGTACGTAC GGGGGGGG GCGTTACCT
+\end_layout
+
+\begin_layout LyX-Code
+ which is, you must admit, a fairly weak loop.
+ It is common to
+\end_layout
+
+\begin_layout LyX-Code
+ allow mismatches, but you will find yourself using insertions and
+\end_layout
+
+\begin_layout LyX-Code
+ deletions much more rarely.
+ In any event, you should note that
+\end_layout
+
+\begin_layout LyX-Code
+ allowing mismatches, insertions, and deletions does force the
+\end_layout
+
+\begin_layout LyX-Code
+ program to try many additional possible pairings, so it does slow
+\end_layout
+
+\begin_layout LyX-Code
+ things down a bit.
+\end_layout
+
+\begin_layout LyX-Code
+How Patterns Are Matched
+\end_layout
+
+\begin_layout LyX-Code
+ Now is as good a time as any to discuss the basic flow of control
+\end_layout
+
+\begin_layout LyX-Code
+ when matching patterns.
+ Recall that a "pattern" is a sequence of
+\end_layout
+
+\begin_layout LyX-Code
+ "pattern units".
+ Suppose that the pattern units were
+\end_layout
+
+\begin_layout LyX-Code
+ u1 u2 u3 u4 ...
+ un
+\end_layout
+
+\begin_layout LyX-Code
+ The scan of a sequence S begins by setting the current position
+\end_layout
+
+\begin_layout LyX-Code
+ to 1.
+ Then, an attempt is made to match u1 starting at the
+\end_layout
+
+\begin_layout LyX-Code
+ current position.
+ Each attempt to match a pattern unit can
+\end_layout
+
+\begin_layout LyX-Code
+ succeed or fail.
+ If it succeeds, then an attempt is made to match
+\end_layout
+
+\begin_layout LyX-Code
+ the next unit.
+ If it fails, then an attempt is made to find an
+\end_layout
+
+\begin_layout LyX-Code
+ alternative match for the immediately preceding pattern unit.
+ If
+\end_layout
+
+\begin_layout LyX-Code
+ this succeeds, then we proceed forward again to the next unit.
+ If
+\end_layout
+
+\begin_layout LyX-Code
+ it fails we go back to the preceding unit.
+ This process is called
+\end_layout
+
+\begin_layout LyX-Code
+ "backtracking".
+ If there are no previous units, then the current
+\end_layout
+
+\begin_layout LyX-Code
+ position is incremented by one, and everything starts again.
+ This
+\end_layout
+
+\begin_layout LyX-Code
+ proceeds until either the current position goes past the end of
+\end_layout
+
+\begin_layout LyX-Code
+ the sequence or all of the pattern units succeed.
+ On success,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches reports the "hit", the current position is set
+\end_layout
+
+\begin_layout LyX-Code
+ just past the hit, and an attempt is made to find another hit.
+\end_layout
+
+\begin_layout LyX-Code
+ If you wish to limit the scan to simply finding a maximum of, say,
+\end_layout
+
+\begin_layout LyX-Code
+ 10 hits, you can use the -n option (-n 10 would set the limit to
+\end_layout
+
+\begin_layout LyX-Code
+ 10 reported hits).
+ For example,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches -c -n 1 pat_file < test_dna_input
+\end_layout
+
+\begin_layout LyX-Code
+ would search for just the first hit (and would stop searching the
+\end_layout
+
+\begin_layout LyX-Code
+ current sequences or any that follow in the input file).
+\end_layout
+
+\begin_layout LyX-Code
+Searching for repeats:
+\end_layout
+
+\begin_layout LyX-Code
+ In the last section, I discussed almost all of the details
+\end_layout
+
+\begin_layout LyX-Code
+ required to allow you to look for repeats.
+ Consider the following
+\end_layout
+
+\begin_layout LyX-Code
+ set of patterns:
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...6 3...8 p1 (find exact 6 character repeat separated
+\end_layout
+
+\begin_layout LyX-Code
+ by to 8 characters)
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...6 3..8 p1[1,0,0] (allow one mismatch)
+\end_layout
+
+\begin_layout LyX-Code
+ p1=3...3 p1[1,0,0] p1[1,0,0] p1[1,0,0]
+\end_layout
+
+\begin_layout LyX-Code
+ (match 12 characters that are the remains
+\end_layout
+
+\begin_layout LyX-Code
+ of a 3-character sequence occurring 4 times)
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ p1=4...8 0...3 p2=6...8 p1 0...3 p2
+\end_layout
+
+\begin_layout LyX-Code
+ (This would match things like
+\end_layout
+
+\begin_layout LyX-Code
+ ATCT G TCTTT ATCT TG TCTTT
+\end_layout
+
+\begin_layout LyX-Code
+ )
+\end_layout
+
+\begin_layout LyX-Code
+Searching for particular sequences:
+\end_layout
+
+\begin_layout LyX-Code
+ Occasionally, one wishes to match a specific, known sequence.
+\end_layout
+
+\begin_layout LyX-Code
+ In such a case, you can just give the sequence (along with an
+\end_layout
+
+\begin_layout LyX-Code
+ optional statement of the allowable mismatches, insertions, and
+\end_layout
+
+\begin_layout LyX-Code
+ deletions).
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...8 GAGA ~p1 (match a hairpin with GAGA as the loop)
+\end_layout
+
+\begin_layout LyX-Code
+ RRRRYYYY (match 4 purines followed by 4 pyrimidines)
+\end_layout
+
+\begin_layout LyX-Code
+ TATAA[1,0,0] (match TATAA, allowing 1 mismatch)
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+Matches against a "weight matrix":
+\end_layout
+
+\begin_layout LyX-Code
+ I will conclude my examples of the types of pattern units
+\end_layout
+
+\begin_layout LyX-Code
+ available for matching against nucleotide sequences by discussing a
+\end_layout
+
+\begin_layout LyX-Code
+ crude implemetation of matching using a "weight matrix".
+ While I
+\end_layout
+
+\begin_layout LyX-Code
+ am less than overwhelmed with the syntax that I chose, I think that
+\end_layout
+
+\begin_layout LyX-Code
+ the reader should be aware that I was thinking of generating
+\end_layout
+
+\begin_layout LyX-Code
+ patterns containing such pattern units automatically from
+\end_layout
+
+\begin_layout LyX-Code
+ alignments (and did not really plan on typing such things in by
+\end_layout
+
+\begin_layout LyX-Code
+ hand very often).
+ Anyway, suppose that you wanted to match a
+\end_layout
+
+\begin_layout LyX-Code
+ sequence of eight characters.
+ The "consensus" of these eight
+\end_layout
+
+\begin_layout LyX-Code
+ characters is GRCACCGS, but the actual "frequencies of occurrence"
+\end_layout
+
+\begin_layout LyX-Code
+ are given in the matrix below.
+ Thus, the first character is an A
+\end_layout
+
+\begin_layout LyX-Code
+ 16% the time and a G 84% of the time.
+ The second is an A 57% of
+\end_layout
+
+\begin_layout LyX-Code
+ the time, a C 10% of the time, a G 29% of the time, and a T 4% of
+\end_layout
+
+\begin_layout LyX-Code
+ the time.
+
+\end_layout
+
+\begin_layout LyX-Code
+ C1 C2 C3 C4 C5 C6 C7 C8
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ A 16 57 0 95 0 18 0 0
+\end_layout
+
+\begin_layout LyX-Code
+ C 0 10 80 0 100 60 0 50
+\end_layout
+
+\begin_layout LyX-Code
+ G 84 29 0 0 0 20 100 50
+\end_layout
+
+\begin_layout LyX-Code
+ T 0 4 20 5 0 2 0 0
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ One could use the following pattern unit to search for inexact
+\end_layout
+
+\begin_layout LyX-Code
+ matches related to such a "weight matrix":
+\end_layout
+
+\begin_layout LyX-Code
+ {(16,0,84,0),(57,10,29,4),(0,80,0,20),(95,0,0,5),
+\end_layout
+
+\begin_layout LyX-Code
+ (0,100,0,0),(18,60,20,2),(0,0,100,0),(0,50,50,0)} > 450
+\end_layout
+
+\begin_layout LyX-Code
+ This pattern unit will attempt to match exactly eight characters.
+\end_layout
+
+\begin_layout LyX-Code
+ For each character in the sequence, the entry in the corresponding
+\end_layout
+
+\begin_layout LyX-Code
+ tuple is added to an accumulated sum.
+ If the sum is greater than
+\end_layout
+
+\begin_layout LyX-Code
+ 450, the match succeeds; else it fails.
+\end_layout
+
+\begin_layout LyX-Code
+ Recently, this feature was upgraded to allow ranges.
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ 600 > {(16,0,84,0),(57,10,29,4),(0,80,0,20),(95,0,0,5),
+\end_layout
+
+\begin_layout LyX-Code
+ (0,100,0,0),(18,60,20,2),(0,0,100,0),(0,50,50,0)} > 450
+\end_layout
+
+\begin_layout LyX-Code
+ will work, as well.
+\end_layout
+
+\begin_layout LyX-Code
+Allowing Alternatives:
+\end_layout
+
+\begin_layout LyX-Code
+ Very occasionally, you may wish to allow alternative pattern units
+\end_layout
+
+\begin_layout LyX-Code
+ (i.e., "match either A or B").
+ You can do this using something
+\end_layout
+
+\begin_layout LyX-Code
+ like
+\end_layout
+
+\begin_layout LyX-Code
+ ( GAGA | GCGCA)
+\end_layout
+
+\begin_layout LyX-Code
+ which says "match either GAGA or GCGCA".
+ You may take
+\end_layout
+
+\begin_layout LyX-Code
+ alternatives of a list of pattern units, for example
+\end_layout
+
+\begin_layout LyX-Code
+ (p1=3...3 3...8 ~p1 | p1=5...5 4...4 ~p1 GGG)
+\end_layout
+
+\begin_layout LyX-Code
+ would match one of two sequences of pattern units.
+ There is one
+\end_layout
+
+\begin_layout LyX-Code
+ clumsy aspect of the syntax: to match a list of alternatives, you
+\end_layout
+
+\begin_layout LyX-Code
+ need to fully the request.
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ (GAGA | (GCGCA | TTCGA))
+\end_layout
+
+\begin_layout LyX-Code
+ would be needed to try the three alternatives.
+\end_layout
+
+\begin_layout LyX-Code
+One Minor Extension
+\end_layout
+
+\begin_layout LyX-Code
+ Sometimes a pattern will contain a sequence of distinct ranges,
+\end_layout
+
+\begin_layout LyX-Code
+ and you might wish to limit the sum of the lengths of the matched
+\end_layout
+
+\begin_layout LyX-Code
+ subsequences.
+ For example, suppose that you basically wanted to
+\end_layout
+
+\begin_layout LyX-Code
+ match something like
+\end_layout
+
+\begin_layout LyX-Code
+ ARRYYTT p1=0...5 GCA[1,0,0] p2=1...6 ~p1 4...8 ~p2 p3=4...10 CCT
+\end_layout
+
+\begin_layout LyX-Code
+ but that the sum of the lengths of p1, p2, and p3 must not exceed
+\end_layout
+
+\begin_layout LyX-Code
+ eight characters.
+ To do this, you could add
+\end_layout
+
+\begin_layout LyX-Code
+ length(p1+p2+p3) < 9
+\end_layout
+
+\begin_layout LyX-Code
+ as the last pattern unit.
+ It will just succeed or fail (but does
+\end_layout
+
+\begin_layout LyX-Code
+ not actually match any characters in the sequence).
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+Matching Protein Sequences
+\end_layout
+
+\begin_layout LyX-Code
+ Suppose that the input file contains protein sequences.
+ In this
+\end_layout
+
+\begin_layout LyX-Code
+ case, you must invoke scan_for_matches with the "-p" option.
+ You
+\end_layout
+
+\begin_layout LyX-Code
+ cannot use aspects of the language that relate directly to
+\end_layout
+
+\begin_layout LyX-Code
+ nucleotide sequences (e.g., the -c command line option or pattern
+\end_layout
+
+\begin_layout LyX-Code
+ constructs referring to the reverse complement of a previously
+\end_layout
+
+\begin_layout LyX-Code
+ matched unit).
+
+\end_layout
+
+\begin_layout LyX-Code
+ You also have two additional constructs that allow you to match
+\end_layout
+
+\begin_layout LyX-Code
+ either "one of a set of amino acids" or "any amino acid other than
+\end_layout
+
+\begin_layout LyX-Code
+ those a given set".
+ For example,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=0...4 any(HQD) 1...3 notany(HK) p1
+\end_layout
+
+\begin_layout LyX-Code
+ would successfully match a string like
+\end_layout
+
+\begin_layout LyX-Code
+ YWV D AA C YWV
+\end_layout
+
+\begin_layout LyX-Code
+Using the show_hits Utility
+\end_layout
+
+\begin_layout LyX-Code
+ When viewing a large set of complex matches, you might find it
+\end_layout
+
+\begin_layout LyX-Code
+ convenient to post-process the scan_for_matches output to get a
+\end_layout
+
+\begin_layout LyX-Code
+ more readable version.
+ We provide a simple post-processor called
+\end_layout
+
+\begin_layout LyX-Code
+ "show_hits".
+ To see its effect, just pipe the output of a
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches into show_hits:
+\end_layout
+
+\begin_layout LyX-Code
+ Normal Output:
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches -c pat_file < tmp
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[1,28]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacguaacc ggttaac cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ >tst1:[28,1]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacgtaacc ggttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ >tst2:[2,31]
+\end_layout
+
+\begin_layout LyX-Code
+ CGTACGUAAC C GGTTAACC GGUUACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ >tst2:[31,2]
+\end_layout
+
+\begin_layout LyX-Code
+ CGTACGTAAC C GGTTAACC GGTTACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ >tst3:[3,32]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacguaacc g gttaactt cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ >tst3:[32,3]
+\end_layout
+
+\begin_layout LyX-Code
+ gtacgtaacc g aagttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ Piped Through show_hits:
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches -c pat_file < tmp | show_hits
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[1,28]: gtacguaacc ggttaac cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[28,1]: gtacgtaacc ggttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[2,31]: CGTACGUAAC C GGTTAACC GGUUACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[31,2]: CGTACGTAAC C GGTTAACC GGTTACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[3,32]: gtacguaacc g gttaactt cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[32,3]: gtacgtaacc g aagttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ clone%
+\end_layout
+
+\begin_layout LyX-Code
+ Optionally, you can specify which of the "fields" in the matches
+\end_layout
+
+\begin_layout LyX-Code
+ you wish to sort on, and show_hits will sort them.
+ The field
+\end_layout
+
+\begin_layout LyX-Code
+ numbers start with 0.
+ So, you might get something like
+\end_layout
+
+\begin_layout LyX-Code
+ clone% scan_for_matches -c pat_file < tmp | show_hits 2 1
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[2,31]: CGTACGUAAC C GGTTAACC GGUUACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst2:[31,2]: CGTACGTAAC C GGTTAACC GGTTACGTACG
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[32,3]: gtacgtaacc g aagttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[1,28]: gtacguaacc ggttaac cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst1:[28,1]: gtacgtaacc ggttaac cggttacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ tst3:[3,32]: gtacguaacc g gttaactt cgguuacgtac
+\end_layout
+
+\begin_layout LyX-Code
+ clone%
+\end_layout
+
+\begin_layout LyX-Code
+ In this case, the hits have been sorted on fields 2 and 1 (that is,
+\end_layout
+
+\begin_layout LyX-Code
+ the third and second matched subfields).
+\end_layout
+
+\begin_layout LyX-Code
+ show_hits is just one possible little post-processor, and you
+\end_layout
+
+\begin_layout LyX-Code
+ might well wish to write a customized one for yourself.
+\end_layout
+
+\begin_layout LyX-Code
+Reducing the Cost of a Search
+\end_layout
+
+\begin_layout LyX-Code
+ The scan_for_matches utility uses a fairly simple search, and may
+\end_layout
+
+\begin_layout LyX-Code
+ consume large amounts of CPU time for complex patterns.
+ Someday,
+\end_layout
+
+\begin_layout LyX-Code
+ I may decide to optimize the code.
+ However, until then, let me
+\end_layout
+
+\begin_layout LyX-Code
+ mention one useful technique.
+
+\end_layout
+
+\begin_layout LyX-Code
+ When you have a complex pattern that includes a number of varying
+\end_layout
+
+\begin_layout LyX-Code
+ ranges, imprecise matches, and so forth, it is useful to
+\end_layout
+
+\begin_layout LyX-Code
+ "pipeline" matches.
+ That is, form a simpler pattern that can be
+\end_layout
+
+\begin_layout LyX-Code
+ used to scan through a large database extracting sections that
+\end_layout
+
+\begin_layout LyX-Code
+ might be matched by the more complex pattern.
+ Let me illustrate
+\end_layout
+
+\begin_layout LyX-Code
+ with a short example.
+ Suppose that you really wished to match the
+\end_layout
+
+\begin_layout LyX-Code
+ pattern
+\end_layout
+
+\begin_layout LyX-Code
+ p1=3...5 0...8 ~p1[1,1,0] p2=6...7 3...6 AGC 3...5 RYGC ~p2[1,0,0]
+\end_layout
+
+\begin_layout LyX-Code
+ In this case, the pattern units AGC 3...5 RYGC can be used to rapidly
+\end_layout
+
+\begin_layout LyX-Code
+ constrain the overall search.
+ You can preprocess the overall
+\end_layout
+
+\begin_layout LyX-Code
+ database using the pattern:
+\end_layout
+
+\begin_layout LyX-Code
+ 31...31 AGC 3...5 RYGC 7...7
+\end_layout
+
+\begin_layout LyX-Code
+ Put the complex pattern in pat_file1 and the simpler pattern in
+\end_layout
+
+\begin_layout LyX-Code
+ pat_file2.
+ Then use,
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches -c pat_file2 < nucleotide_database |
+\end_layout
+
+\begin_layout LyX-Code
+ scan_for_matches pat_file1
+\end_layout
+
+\begin_layout LyX-Code
+ The output will show things like
+\end_layout
+
+\begin_layout LyX-Code
+ >seqid:[232,280][2,47]
+\end_layout
+
+\begin_layout LyX-Code
+ matches pieces
+\end_layout
+
+\begin_layout LyX-Code
+ Then, the actual section of the sequence that was matched can be
+\end_layout
+
+\begin_layout LyX-Code
+ easily computed as [233,278] (remember, the positions start from
+\end_layout
+
+\begin_layout LyX-Code
+ 1, not 0).
+\end_layout
+
+\begin_layout LyX-Code
+ Let me finally add, you should do a few short experiments to see
+\end_layout
+
+\begin_layout LyX-Code
+ whether or not such pipelining actually improves performance -- it
+\end_layout
+
+\begin_layout LyX-Code
+ is not always obvious where the time is going, and I have
+\end_layout
+
+\begin_layout LyX-Code
+ sometimes found that the added complexity of pipelining actually
+\end_layout
+
+\begin_layout LyX-Code
+ slowed things up.
+ It gets its best improvements when there are
+\end_layout
+
+\begin_layout LyX-Code
+ exact matches of more than just a few characters that can be
+\end_layout
+
+\begin_layout LyX-Code
+ rapidly used to eliminate large sections of the database.
+\end_layout
+
+\begin_layout LyX-Code
+=============
+\end_layout
+
+\begin_layout LyX-Code
+Additions:
+\end_layout
+
+\begin_layout LyX-Code
+Feb 9, 1995: the pattern units ^ and $ now work as in normal regular
+\end_layout
+
+\begin_layout LyX-Code
+ expressions.
+ That is
+\end_layout
+
+\begin_layout LyX-Code
+ TTF $
+\end_layout
+
+\begin_layout LyX-Code
+ matches only TTF at the end of the string and
+\end_layout
+
+\begin_layout LyX-Code
+ ^ TTF
+\end_layout
+
+\begin_layout LyX-Code
+ matches only an initial TTF
+\end_layout
+
+\begin_layout LyX-Code
+ The pattern unit
+\end_layout
+
+\begin_layout LyX-Code
+ <p1
+\end_layout
+
+\begin_layout LyX-Code
+ matches the reverse of the string named p1.
+ That is,
+\end_layout
+
+\begin_layout LyX-Code
+ if p1 matched GCAT, then <p1 would match TACG.
+ Thus,
+\end_layout
+
+\begin_layout LyX-Code
+ p1=6...6 <p1
+\end_layout
+
+\begin_layout LyX-Code
+ matches a real palindrome (not the biologically common
+\end_layout
+
+\begin_layout LyX-Code
+ meaning of "reverse complement")
+\end_layout
+
+\begin_layout LyX-Code
+
+\end_layout
+
+\end_body
+\end_document
--- /dev/null
+%% LyX 1.5.1 created this file. For more info, see http://www.lyx.org/.
+%% Do not edit unless you really know what you are doing.
+\documentclass[english]{scrartcl}
+\usepackage[T1]{fontenc}
+\usepackage[latin9]{inputenc}
+\setlength{\parskip}{\medskipamount}
+\setlength{\parindent}{0pt}
+\usepackage{amsmath}
+\usepackage{graphicx}
+\IfFileExists{url.sty}{\usepackage{url}}
+ {\newcommand{\url}{\texttt}}
+
+\makeatletter
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
+%% Because html converters don't know tabularnewline
+\providecommand{\tabularnewline}{\\}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Textclass specific LaTeX commands.
+\newenvironment{lyxcode}
+{\begin{list}{}{
+\setlength{\rightmargin}{\leftmargin}
+\setlength{\listparindent}{0pt}% needed for AMS classes
+\raggedright
+\setlength{\itemsep}{0pt}
+\setlength{\parsep}{0pt}
+\normalfont\ttfamily}%
+ \item[]}
+{\end{list}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
+\usepackage[colorlinks=true, urlcolor=blue, linkcolor=black]{hyperref}
+
+\usepackage{babel}
+\makeatother
+
+\begin{document}
+
+\title{Biotools Cookbook}
+
+
+\author{Martin Asser Hansen}
+
+
+\publishers{John Mattick Group\\
+Institute for Molecular Bioscience\\
+University of Queensland\\
+Australia\\
+E-mail: mail@maasha.dk}
+
+\maketitle
+\thispagestyle{empty}
+
+\newpage{}
+
+\tableofcontents{}
+
+\listoffigures
+
+
+\newpage{}
+
+
+\section{Introduction}
+
+Biotools is a selection of simple tools that can be linked together
+(piped as we shall call it) in a very flexible manner to perform both
+simple and complex tasks. The fundamental idea is that biotools work
+on a data stream that will only terminate at the end of an analysis
+and that this data stream can be passed through several different
+biotools, each performing one specific task. The advantage of this
+approach is that a user can perform simple and complex tasks without
+having to write advanced code. Moreover, since the data format used
+to pass data between biotools is text based, biotools can be written
+by different developers in their favorite programming language ---
+and still the biotools will be able to work together.
+
+In the most simple form bioools can be piped together on the command
+line like this (using the pipe character '|'):
+
+\begin{lyxcode}
+read\_data~|~calculate\_something~|~write\_result
+\end{lyxcode}
+However, a more comprehensive analysis could be composed:
+
+\begin{lyxcode}
+read\_data~|~select\_entries~|~convert\_entries~|~search\_database~~
+
+evaluate\_results~|~plot\_diagram~|~plot\_another\_diagram~|
+
+load\_to\_database
+\end{lyxcode}
+The data stream that is piped through the biotools consists of records
+of key/value pairs in the same way a hash does in order to keep as
+simple a structure as possible. An example record can be seen below:
+
+\begin{lyxcode}
+
+
+REC\_TYPE:~PATSCAN
+
+MATCH:~AGATCAAGTG
+
+S\_BEG:~7
+
+S\_END:~16
+
+ALIGN\_LEN:~9
+
+S\_ID:~piR-t6
+
+STRAND:~+
+
+PATTERN:~AGATCAAGTG
+
+-{}-{}-
+\end{lyxcode}
+The '-\/-\/-' denotes the delimiter of the records, and each key
+is a word followed by a ':' and a white-space and then the value.
+By convention the biotools only uses upper case keys (a list of used
+keys can be seen in Appendix~\ref{sec:Keys}). Since the records
+basically are hash structures this mean that the order of the keys
+in the stream is unordered, and in the above example it is pure coincidence
+that HIT\_BEG is displayed before HIT\_END, however, when the order
+of the keys is importent, the biotools will automagically see to that.
+
+All of the biotools are able to read and write a data stream to and
+from file as long as the records are in the biotools format. This
+means that if you are undertaking a lengthy analysis where one of
+the steps is time consuming, you may save the stream after this step,
+and subsequently start one or more analysis from that last step%
+\footnote{It is a goal that the biotools at some point will be able to dump
+the data stream to file in case one of the tools fail critically.%
+}. If you are running a lengthy analysis it is highly recommended that
+you create a small test sample of the data and run that through the
+pipeline --- and once you are satisfied with the result proceed with
+the full data set (see~\ref{sub:How-to-select-a-few-records}).
+
+
+\section{The Data Stream}
+
+
+\subsection{How to read the data stream from file?\label{sub:How-to-read-stream}}
+
+You want to read a data stream that you previously have saved to file
+in biotools format. This can be done implicetly or explicitly. The
+implicit way uses the 'stdout' stream of the Unix terminal:
+
+\begin{lyxcode}
+cat~|~<biotool>
+\end{lyxcode}
+cat is the Unix command that reads a file and output the result to
+'stdout' --- which in this case is piped to any biotool represented
+by the <biotool>. It is also possible to read the data stream using
+'<' to direct the 'stdout' stream into the biotool like this:
+
+\begin{lyxcode}
+<biotool>~<~<file>
+\end{lyxcode}
+However, that will not work if you pipe more biotools together. Then
+it is much safer to read the stream from a file explicitly like this:
+
+\begin{lyxcode}
+<biotool>~-{}-stream\_in=<file>
+\end{lyxcode}
+Here the filename <file> is explicetly given to the biotool <biotool>
+with the switch -\/-stream\_in. This switch works with all biotools.
+It is also possible to read in data from multiple sources by repeating
+the explicit read step:
+
+\begin{lyxcode}
+<biotool>~-{}-stream\_in=<file1>~|~<biotool>~-{}-stream\_in=<file2>
+\end{lyxcode}
+
+\subsection{How to write the data stream to file?\label{sub:How-to-write-stream}}
+
+In order to save the output stream from a biotool to file, so you
+can read in the stream again at a later time, you can do one of two
+things:
+
+\begin{lyxcode}
+<biotool>~>~<file>
+\end{lyxcode}
+All, the biotools write the data stream to 'stdout' by default which
+can be written to a file by redirecting 'stdout' to file using '>'
+, however, if one of the biotools for writing other formats is used
+then the both the biotools records as well as the result output will
+go to 'stdout' in a mixture causing havock! To avoid this you must
+use the switch -\/-stream\_out that explictly tells the biotool to
+write the output stream to file:
+
+\begin{lyxcode}
+<biotool>~-{}-stream\_out=<file>
+\end{lyxcode}
+The -\/-stream\_out switch works with all biotools.
+
+
+\subsection{How to terminate the data stream?}
+
+The data stream is never stops unless the user want to save the stream
+or by supplying the -\/-no\_stream switch that will terminate the
+stream:
+
+\begin{lyxcode}
+<biotool>~-{}-no\_stream
+\end{lyxcode}
+The -\/-no\_stream switch only works with those biotools where it
+makes sense that the user might want to terminale the data stream,
+\emph{i.e}. after an analysis step where the user wants to output
+the result, but not the data stream.
+
+
+\subsection{How to write my results to file?\label{sub:How-to-write-result}}
+
+Saving the result of an analysis to file can be done implicitly or
+explicitly. The implicit way:
+
+\begin{lyxcode}
+<biotool>~-{}-no\_stream~>~<file>
+\end{lyxcode}
+If you use '>' to redirect 'stdout' to file then it is important to
+use the -\/-no\_stream switch to avoid writing a mix of biotools
+records and result to the same file causing havock. The safe way is
+to use the -\/-result\_out switch which explicetly tells the biotool
+to write the result to a given file:
+
+\begin{lyxcode}
+<biotool>~-{}-result\_out=<file>
+\end{lyxcode}
+Using the above method will not terminate the stream, so it is possible
+to pipe that into another biotool generating different results:
+
+\begin{lyxcode}
+<biotool1>~-{}-result\_out=<file1>~|~<biotool2>~-{}-result\_out=<file2>
+\end{lyxcode}
+And still the data stream will continue unless terminated with -\/-no\_stream:
+
+\begin{lyxcode}
+<biotool>~-{}-result\_out=<file>~-{}-no\_stream
+\end{lyxcode}
+Or written to file using implicitly or explicity \eqref{sub:How-to-write-result}.
+The explicit way:
+
+\begin{lyxcode}
+<biotool>~-{}-result\_out=<file1>~-{}-stream\_out=<file2>
+\end{lyxcode}
+
+\subsection{How to read data from multiple sources?}
+
+To read multiple data sources, with the same type or different type
+of data do:
+
+\begin{lyxcode}
+<biotool1>~-{}-data\_in=<file1>~|~<biotool2>~-{}-data\_in=<file2>
+\end{lyxcode}
+where type is the data type a specific biotool reads.
+
+
+\section{Reading input}
+
+
+\subsection{How to read biotools input?}
+
+See \eqref{sub:How-to-read-stream}.
+
+
+\subsection{How to read in data?}
+
+Data in different formats can be read with the appropriate biotool
+for that format. The biotools are typicalled named 'read\_<data type>'
+such as \textbf{read\_fasta}, \textbf{read\_bed}, \textbf{read\_tab},
+etc., and all behave in a similar manner. Data can be read by supplying
+the -\/-data\_in switch and a file name to the file containing the
+data:
+
+\begin{lyxcode}
+<biotool>~-{}-data\_in=<file>
+\end{lyxcode}
+It is also possible to read in a saved biotools stream (see \ref{sub:How-to-read-stream})
+as well as reading data in one go:
+
+\begin{lyxcode}
+<biotool>~-{}-stream\_in=<file1>~-{}-data\_in=<file2>
+\end{lyxcode}
+If you want to read data from several files you can do this:
+
+\begin{lyxcode}
+<biotool>~-{}-data\_in=<file1>~|~<biotool>~-{}-data\_in=<file2>
+\end{lyxcode}
+If you have several data files you can read in all explicitly with
+a comma separated list:
+
+\begin{lyxcode}
+<biotool>~-{}-data\_in=file1,file2,file3
+\end{lyxcode}
+And it is also possible to use file globbing:
+
+\begin{lyxcode}
+<biotool>~-{}-data\_in={*}.fna
+\end{lyxcode}
+Or in a combination:
+
+\begin{lyxcode}
+<biotool>~-{}-data\_in=file1,/dir/{*}.fna
+\end{lyxcode}
+Finally, it is possible to read in data in different formats using
+the appropriate biotool for each format:
+
+\begin{lyxcode}
+<biotool1>~-{}-data\_in=<file1>~|~<biotool2>~-{}-data\_in=<file2>~...
+\end{lyxcode}
+
+\subsection{How to read FASTA input?}
+
+Sequences in FASTA format can be read explicitly using \textbf{read\_fasta}:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>
+\end{lyxcode}
+
+\subsection{How to read alignment input?}
+
+If your alignment if FASTA formatted then you can \textbf{read\_align}.
+It is also possible to use \textbf{read\_fasta} since the data is
+FASTA formatted, however, with \textbf{read\_fasta} the key ALIGN
+will be omitted. The ALIGN key is used to determine which sequences
+belong to what alignment which is required for \textbf{write\_align}.
+
+\begin{lyxcode}
+read\_align~-{}-data\_in=<file>
+\end{lyxcode}
+
+\subsection{How to read tabular input?\label{sub:How-to-read-table}}
+
+Tabular input can be read with \textbf{read\_tab} which will read
+in all rows and chosen columns (separated by a given delimter) from
+a table in text format.
+
+The table below:
+
+\noindent \begin{center}
+\begin{tabular}{lll}
+Human & ATACGTCAG & 23524\tabularnewline
+Dog & AGCATGAC & 2442\tabularnewline
+Mouse & GACTG & 234\tabularnewline
+Cat & AAATGCA & 2342\tabularnewline
+\end{tabular}
+\par\end{center}
+
+Can be read using the command:
+
+\begin{lyxcode}
+read\_tab~-{}-data\_in=<file>
+\end{lyxcode}
+Which will result in four records, one for each row, where the keys
+V0, V1, V2 are the default keys for the organism, sequence, and count,
+respectively. It is possible to select a subset of colums to read
+by using the -\/-cols switch which takes a comma separated list of
+columns numbers (first column is designated 0) as argument. So to
+read in only the sequence and the count so that the count comes before
+the sequence do:
+
+\begin{lyxcode}
+read\_tab~-{}-data\_in=<file>~-{}-cols=2,1
+\end{lyxcode}
+It is also possible to name the columns with the -\/-keys switch:
+
+\begin{lyxcode}
+read\_tab~-{}-data\_in=<file>~-{}-cols=2,1~-{}-keys=COUNT,SEQ
+\end{lyxcode}
+
+\subsection{How to read BED input?}
+
+The BED (Browser Extensible Data%
+\footnote{\url{http://genome.ucsc.edu/FAQ/FAQformat}%
+}) format is a tabular format for data pertaining to one of the Eukaryotic
+genomes in the UCSC genome brower%
+\footnote{\url{http://genome.ucsc.edu/}%
+}. The BED format consists of up to 12 columns, where the first three
+are mandatory CHR, CHR\_BEG, and CHR\_END. The mandatory columns and
+any of the optional columns can all be read in easily with the \textbf{read\_bed}
+biotool.
+
+\begin{lyxcode}
+read\_bed~-{}-data\_in=<file>
+\end{lyxcode}
+It is also possible to read the BED file with \textbf{read\_tab} (see~\ref{sub:How-to-read-table}),
+however, that will be more cumbersome because you need to specify
+the keys:
+
+\begin{lyxcode}
+read\_tab~-{}-data\_in=<file>~-{}-keys=CHR,CHR\_BEG,CHR\_END~...
+\end{lyxcode}
+
+\subsection{How to read PSL input?}
+
+The PSL format is the output from BLAT and contains 21 mandatory fields
+that can be read with \textbf{read\_psl}:
+
+\begin{lyxcode}
+read\_psl~-{}-data\_in=<file>
+\end{lyxcode}
+
+\section{Writing output}
+
+All result output can be written explicitly to file using the -\/-result\_out
+switch which all result generating biotools have. It is also possible
+to write the result to file implicetly by directing 'stdout' to file
+using '>', however, that requires the -\/-no\_stream swich to prevent
+a mixture of data stream and results in the file. The explicit (and
+safe) way:
+
+\begin{lyxcode}
+...~|~<biotool>~-{}-result\_out=<file>
+\end{lyxcode}
+The implicit way:
+
+\begin{lyxcode}
+...~|~<biotool>~-{}-no\_stream~>~<file>
+\end{lyxcode}
+
+\subsection{How to write biotools output?}
+
+See \eqref{sub:How-to-write-stream}.
+
+
+\subsection{How to write FASTA output?\label{sub:How-to-write-fasta}}
+
+FASTA output can be written with \textbf{write\_fasta}.
+
+\begin{lyxcode}
+...~|~write\_fasta~-{}-result\_out=<file>
+\end{lyxcode}
+It is also possible to wrap the sequences to a given width using the
+-\/-wrap switch allthough wrapping of sequence is generally an evil
+thing:
+
+\begin{lyxcode}
+...~|~write\_fasta~-{}-no\_stream~-{}-wrap=80
+\end{lyxcode}
+
+\subsection{How to write alignment output?\label{sub:How-to-write-alignment}}
+
+Pretty alignments with ruler%
+\footnote{'.' for every 10 residues, ':' for every 50, and '|' for every 100%
+} and consensus sequence can be created with \textbf{write\_align},
+what also have the optional -\/-wrap switch to break the alignment
+into blocks of a given width:
+
+\begin{lyxcode}
+...~|~write\_align~-{}-result\_out=<file>~-{}-wrap=80
+\end{lyxcode}
+If the number of sequnces in the alignment is 2 then a pairwise alignment
+will be output otherwise a multiple alignment. And if the sequence
+type, determined automagically, is protein, then residues and symbols
+(+,~:,~.) will be used to show consensus according to the Blosum62
+matrix.
+
+
+\subsection{How to write tabular output?\label{sub:How-to-write-tab}}
+
+Outputting the data stream as a table can be done with \textbf{write\_tab},
+which will write generate one row per record with the values as columns.
+If you supply the optional -\/-comment switch, when the first row
+in the table will be a 'comment' line prefixed with a '\#':
+
+\begin{lyxcode}
+...~|~write\_tab~-{}-result\_out=<file>~-{}-comment
+\end{lyxcode}
+You can also change the delimiter from the default (tab) to \emph{e.g.}
+',':
+
+\begin{lyxcode}
+...~|~write\_tab~-{}-result\_out=<file>~-{}-delimit=','
+\end{lyxcode}
+If you want the values output in a specific order you have to supply
+a comma separated list using the -\/-keys switch that will print
+only those keys in that order:
+
+\begin{lyxcode}
+...~|~write\_tab~-{}-result\_out=<file>~-{}-keys=SEQ\_NAME,COUNT
+\end{lyxcode}
+Alternatively, if you have some keys that you don't want in the tabular
+output, use the -\/-no\_keys switch. So to print all keys except
+SEQ and SEQ\_TYPE do:
+
+\begin{lyxcode}
+...~|~write\_tab~-{}-result\_out=<file>~-{}-no\_keys=SEQ,SEQ\_TYPE
+\end{lyxcode}
+Finally, if you have a stream containing a mix of different records
+types, \emph{e.g.} records with sequences and records with matches,
+then you can use \textbf{write\_tab} to output all the records in
+tabluar format, however, the -\/-comment, -\/-keys, and -\/-no\_keys
+switches will only respond to records of the first type encountered.
+The reason is that outputting mixed records is probably not what you
+want anyway, and you should remove all the unwanted records from the
+stream before outputting the table: \textbf{grab} is your friend (see~\ref{sub:How-to-grab}).
+
+
+\subsection{How to write a BED output?\label{sub:How-to-write-BED}}
+
+Data in BED format can be output if the records contain the mandatory
+keys CHR, CHR\_BEG, and CHR\_END using \textbf{write\_bed}. If the
+optional keys are also present, they will be output as well:
+
+\begin{lyxcode}
+write\_bed~-{}-result\_out=<file>
+\end{lyxcode}
+
+\subsection{How to write PSL output?\label{sub:How-to-write-PSL}}
+
+Data in PSL format can be output using \textbf{write\_psl:}
+
+\begin{lyxcode}
+write\_psl~-{}-result\_out=<file>
+\end{lyxcode}
+
+\section{Manipulating Records}
+
+
+\subsection{How to select a few records?\label{sub:How-to-select-a-few-records}}
+
+To quickly get an overview of your data you can limit the data stream
+to show a few records. This also very useful to test the pipeline
+with a few records if you are setting up a complex analysis using
+several biotools. That way you can inspect that all goes well before
+analyzing and waiting for the full data set. All of the read\_<type>
+biotools have the -\/-num switch which will take a number as argument
+and only that number of records will be read. So to read in the first
+10 FASTA entries from a file:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=test.fna~-{}-num=10
+\end{lyxcode}
+Another way of doing this is to use \textbf{head\_records} will limit
+the stream to show the first 10 records (default):
+
+\begin{lyxcode}
+...~|~head\_records
+\end{lyxcode}
+Using \textbf{head\_records} directly after one of the read\_<type>
+biotools will be a lot slower than using the -\/-num switch with
+the read\_<type> biotools, however, \textbf{head\_records} can also
+be used to limit the output from all the other biotools. It is also
+possible to give \textbf{head\_records} a number of records to show
+using the -\/-num switch. So to display the first 100 records do:
+
+\begin{lyxcode}
+...~|~head\_records~-{}-num=100
+\end{lyxcode}
+
+\subsection{How to count all records in the data stream?}
+
+To count all the records in the data stream use \textbf{count\_records},
+which adds one record (which is not included in the count) to the
+data stream. So to count the number of sequences in a FASTA file you
+can do this:
+
+\begin{lyxcode}
+cat~test.fna~|~read\_fasta~|~count\_records~-{}-no\_stream
+\end{lyxcode}
+Which will write the last record containing the count to 'stdout':
+
+\begin{lyxcode}
+-{}-{}-
+
+count\_records:~630
+\end{lyxcode}
+It is also possible to write the count to file using the -\/-result\_out
+switch.
+
+
+\subsection{How to grab specific records?\label{sub:How-to-grab}}
+
+The biotool \textbf{grab} is related to the Unix grep and locates
+records based on matching keys and/or values using either a pattern,
+a Perl regex, or a numerical evaluation. To easily \textbf{grab} all
+records in the stream that has any mentioning of the pattern 'human'
+just pipe the data stream through \textbf{grab} like this:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern=human
+\end{lyxcode}
+This will search for the pattern 'human' in all keys and all values.
+The -\/-pattern switch takes a comma separated list of patterns,
+so in order to match multiple patterns do:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern=human,mouse
+\end{lyxcode}
+It is also possible to use the -\/-pattern\_in switch instead of
+-\/-pattern. -\/-pattern\_in is used to read a file with one pattern
+per line:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern\_in=patterns.txt
+\end{lyxcode}
+If you want the opposite result --- to find all records that does
+not match the patterns, add the -\/-invert switch, which not only
+works with the -\/-pattern switch, but also with -\/-regex and -\/-eval:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern=human~-{}-invert
+\end{lyxcode}
+If you want to search the record keys only, \emph{e.g.} to find all
+records containing the key SEQ you can add the -\/-keys\_only switch.
+This will prevent matching of SEQ in any record value, and in fact
+SEQ is a not uncommon peptide sequence you could get an unwanted record.
+Also, this will give an increase in speed since only the keys are
+searched:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern=SEQ~-{}-keys\_only
+\end{lyxcode}
+However, if you are interested in finding the peptide sequence SEQ
+and not the SEQ key, just add the -\/-vals\_only switch instead:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern=SEQ~-{}-vals\_only
+\end{lyxcode}
+Also, if you want to grab for certain key/value pairs you can supply
+a comma separated list of keys whos values will then be searched using
+the -\/-keys switch. This is handy if your records contain large
+genomic sequences and you dont want to search the entire sequence
+for \emph{e.g.} the organism name --- it is much faster to tell \textbf{grab}
+which keys to search the value for:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern=human~-{}-keys=SEQ\_NAME
+
+
+\end{lyxcode}
+It is also possible to invoke flexible matching using regex (regular
+expressions) instead of simple pattern matching. In \textbf{grab}
+the regex engine is Perl based and allows use of different type of
+wild cards, alternatives, \emph{etc}%
+\footnote{\url{http://perldoc.perl.org/perlreref.html}%
+}. If you want to \textbf{grab} records withs the sequence ATCG or
+GCTA you can do this:
+
+\begin{lyxcode}
+...~|~grab~-{}-regex='ATCG|GCTA'
+\end{lyxcode}
+Or if you want to find sequences beginning with ATCG:
+
+\begin{lyxcode}
+...~|~grab~-{}-regex='\textasciicircum{}ATCG'
+\end{lyxcode}
+You can also use \textbf{grab} to locate records that fulfill a numerical
+property using the -\/-eval switch witch takes an expression in three
+parts. The first part is the key that holds the number we want to
+evaluate, the second part holds one the six operators:
+
+\begin{enumerate}
+\item Greater than: >
+\item Greater than or equal to: >=
+\item Less than: <
+\item Less than or equal to: <=
+\item Equal to: =
+\item Not equal to: !=
+\end{enumerate}
+And finally comes the number used in the evaluation. So to \textbf{grab}
+all records with a sequence length greater than 30:
+
+\begin{lyxcode}
+...~length\_seq~|~grab~-{}-eval='SEQ\_LEN~>~30'
+\end{lyxcode}
+If you want to locate all records containing the pattern 'human' and
+where the sequence length is greater that 30, you do this by running
+the stream through \textbf{grab} twice:
+
+\begin{lyxcode}
+...~|~grab~-{}-pattern='human'~|~length\_seq~|~grab~-{}-eval='SEQ\_LEN~>~30'
+\end{lyxcode}
+To get the best speed performance, use the most restrictive \textbf{grab}
+first.
+
+
+\subsection{How to remove keys from records?}
+
+To remove one or more specific keys from all records in the data stream
+use \textbf{remove\_keys} like this:
+
+\begin{lyxcode}
+...~|~remove\_keys~-{}-keys=SEQ,SEQ\_NAME
+\end{lyxcode}
+In the above example SEQ and SEQ\_NAME will be removed from all records
+if they exists in these. If all keys are removed from a record, then
+the record will be removed.
+
+
+\subsection{How to rename keys in records?}
+
+Sometimes you want to rename a record key, \emph{e.g.} if you have
+read in a two column table with sequence name and sequence in each
+column (see \ref{sub:How-to-read-table}) without specifying the key
+names, then the sequence name will be called V0 and the sequence V1
+as default in the \textbf{read\_tab} biotool. To rename the V0 and
+V1 keys we need to run the stream through \textbf{rename\_keys} twice
+(one for each key to rename):
+
+\begin{lyxcode}
+...~|~rename\_keys~-{}-keys=V0,SEQ\_NAME~|~rename\_keys~-{}-keys=V1,SEQ
+\end{lyxcode}
+The first instance of \textbf{rename\_keys} replaces all the V0 keys
+with SEQ\_NAME, and the second instance of \textbf{rename\_keys} replaces
+all the V1 keys with SEQ. \emph{Et viola} the data can now be used
+in the biotools that requires these keys.
+
+
+\section{Manipulating Sequences}
+
+
+\subsection{How to get sequence lengths?}
+
+The length for sequences in records can be determined with \textbf{length\_seq},
+which adds the key SEQ\_LEN to each record with the sequence length
+as the value. It also generates an extra record that is emitted last
+with the key TOTAL\_SEQ\_LEN showing the total length of all the sequences.
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~length\_seq
+\end{lyxcode}
+It is also possible to determine the sequence length using the generic
+tool \textbf{length\_vals} (see \#\#\#), which determines the length
+of the values for a given list of keys:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~length\_vals~-{}-keys=SEQ
+\end{lyxcode}
+To obtain the total length of all sequences use \textbf{sum\_vals}
+like this:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~length\_vals~-{}-keys=SEQ
+
+|~sum\_vals~-{}-keys=SEQ\_LEN
+\end{lyxcode}
+The biotool \textbf{analyze\_seq} will also determine the length of
+each sequence (see~\ref{sub:How-to-analyze}).
+
+
+\subsection{How to analyze sequence composition?\label{sub:How-to-analyze}}
+
+If you want to find out the sequence type, composition, length, as
+well as GC content, indel content and proportions of soft and hard
+masked sequence, then use \textbf{analyze\_seq}. This handy biotool
+will determine all these things per sequence from which it is easy
+to get an overview using the \textbf{write\_tab} biotool to output
+a table (see~\ref{sub:How-to-write-tab}). So in order to determine
+the sequence composition of a FASTA file with just one entry containing
+the sequence 'ATCG' we just read the data with \textbf{read\_fasta}
+and run the output through \textbf{analyze\_seq} which will add the
+analysis to the record like this:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=test.fna~|~analyze\_seq~...
+
+
+
+-{}-{}-
+
+GC\%:~50.00
+
+HARD\_MASK\%:~0.00
+
+RES:-:~0
+
+RES:.:~0
+
+RES:A:~1
+
+RES:B:~0
+
+RES:C:~1
+
+RES:D:~0
+
+RES:G:~1
+
+RES:H:~0
+
+RES:K:~0
+
+RES:M:~0
+
+RES:N:~0
+
+RES:R:~0
+
+RES:S:~0
+
+RES:T:~1
+
+RES:U:~0
+
+RES:V:~0
+
+RES:W:~0
+
+RES:Y:~0
+
+RES:\textasciitilde{}:~0
+
+SEQ:~ATCG
+
+SEQ\_LEN:~4
+
+SEQ\_NAME:~test
+
+SEQ\_TYPE:~DNA
+
+SOFT\_MASK\%:~0.00
+\end{lyxcode}
+Now to make a table of how may As, Ts, Cs, and Gs you can add the
+following:
+
+\begin{lyxcode}
+...~|~analyze\_seq~|~write\_tab~-{}-keys=RES:A,RES:T,RES:C,RES:G
+\end{lyxcode}
+Or if you want to see the proportions of hard and soft masked sequence:
+
+\begin{lyxcode}
+...~|~analyse\_seq~|~write\_tab~-{}-keys=HARD\_MASK\%,SOFT\_MASK\%
+\end{lyxcode}
+If you have a stack of sequences in one file and you want to determine
+the mean GC content you can do it using the \textbf{mean\_vals} biotool:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=test.fna~|~analyze\_seq~|~mean\_vals~-{}-keys=GC\%
+\end{lyxcode}
+Or if you want the total count of Ns you can use \textbf{sum\_vals}
+like this:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=test.fna~|~analyze\_seq~|~sum\_vals~-{}-keys=RES:N
+\end{lyxcode}
+
+\subsection{How to extract subsequences?\label{sub:How-to-extract}}
+
+In order to extract a subsequence from a longer sequence use the biotool
+extract\_seq, which will replace the sequence in the record with the
+subsequence (this behaviour should probably be modified to be dependant
+of a -\/-replace or a -\/-no\_replace switch). So to extract the
+first 20 residues from all sequences do (first residue is designated
+1):
+
+\begin{lyxcode}
+...~|~extract\_seq~-{}-beg=1~-{}-len=20
+\end{lyxcode}
+You can also specify a begin and end coordinate set:
+
+\begin{lyxcode}
+...~|~extract\_seq~-{}-beg=20~-{}-end=40
+\end{lyxcode}
+If you want the subsequences from position 20 to the sequence end
+do:
+
+\begin{lyxcode}
+...~|~extract\_seq~-{}-beg=20
+\end{lyxcode}
+If you want to extract subsequences a given distance from the sequence
+end you can do this by reversing the sequence with the biotool \textbf{reverse\_seq}
+\eqref{sub:How-to-reverse-seq}, followed by \textbf{extract\_seq}
+to get the subsequence, and then \textbf{reverse\_seq} again to get
+the subsequence back in the original orientation:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=test.fna~|~reverse\_seq
+
+|~extract\_seq~-{}-beg=10~-{}-len=10~|~reverse\_seq
+\end{lyxcode}
+
+\subsection{How to get genomic sequence?\label{sub:How-to-get-genomic-sequence}}
+
+The biotool \textbf{get\_genomic\_seq} can extract subsequences for
+a given genome specified with the -\/-genome switch explicitly using
+the -\/-beg and -\/-end/-\/-len switches:
+
+\begin{lyxcode}
+get\_genome\_seq~-{}-genome=<genome>~-{}-beg=1~-{}-len=100
+\end{lyxcode}
+Alternatively, \textbf{get\_genome\_seq} can be used to append the
+corresponding sequence to BED, PSL, and BLAST records:
+
+\begin{lyxcode}
+read\_bed~-{}-data\_in=<BED~file>~|~get\_genome\_seq~-{}-genome=<genome>
+\end{lyxcode}
+
+\subsection{How to upper-case sequences?}
+
+Sequences can be shifted from lower case to upper case using \textbf{uppercase\_seq}:
+
+\begin{lyxcode}
+...~|~uppercase\_seq
+\end{lyxcode}
+
+\subsection{How to reverse sequences?\label{sub:How-to-reverse-seq}}
+
+The order of residues in a sequence can be reversed using reverse\_seq:
+
+\begin{lyxcode}
+...~|~reverse\_seq
+\end{lyxcode}
+Note that in order to reverse/complement a sequence you also need
+the \textbf{complement\_seq} biotool (see~\ref{sub:How-to-complement}).
+
+
+\subsection{How to complement sequences?\label{sub:How-to-complement}}
+
+DNA and RNA sequences can be complemented with \textbf{complement\_seq},
+which automagically determines the sequence type:
+
+\begin{lyxcode}
+...~|~complement\_seq
+\end{lyxcode}
+Note that in order to reverse/complement a sequence you also need
+the \textbf{reverse\_seq} biotool (see~\ref{sub:How-to-reverse-seq}).
+
+
+\subsection{How to remove indels from sequnces?}
+
+Indels can be removed from sequences with the \textbf{remove\_indels}
+biotool. This is useful if you have aligned some sequences (see~\ref{sub:How-to-align})
+and extracted (see~\ref{sub:How-to-extract}) a block of subsequences
+from the alignment and you want to use these sequence in a search
+where you need to remove the indels first. '-', '\textasciitilde{}',
+and '.' are considered indels:
+
+\begin{lyxcode}
+...~|~remove\_indels
+\end{lyxcode}
+
+\subsection{How to split sequences into overlapping subsequences?}
+
+Sequences can be slit into overlapping subsequences with the \textbf{split\_seq}
+biotool.
+
+\begin{lyxcode}
+...~|~split\_seq~-{}-word\_size=20~-{}-uniq
+\end{lyxcode}
+
+\subsection{How to determine the oligo frequency?}
+
+In order to determine if any oligo usage is over represented in one
+or more sequences you can determine the frequency of oligos of a given
+size with \textbf{oligo\_freq}:
+
+\begin{lyxcode}
+...~|~oligo\_freq~-{}-word\_size=4
+\end{lyxcode}
+And if you have more than one sequence and want to accumulate the
+frequences you need the -\/-all switch:
+
+\begin{lyxcode}
+...~|~oligo\_freq~-{}-word\_size=4~-{}-all
+\end{lyxcode}
+To get a meaningful result you need to write the resulting frequencies
+as a table with \textbf{write\_tab} (see~\ref{sub:How-to-write-tab}),
+but first it is important to \textbf{grab} (see~\ref{sub:How-to-grab})
+the records with the frequencies to avoid full length sequences in
+the table:
+
+\begin{lyxcode}
+...~|~oligo\_freq~-{}-word\_size=4~-{}-all~|~grab~-{}-pattern=OLIGO~-{}-keys\_only
+
+|~write\_tab~-{}-no\_stream
+\end{lyxcode}
+And the resulting frequency table can be sorted with Unix sort (man
+sort).
+
+
+\subsection{How to search for sequences in genomes?}
+
+See the following biotool:
+
+\begin{itemize}
+\item \textbf{patscan\_seq} \eqref{sub:How-to-use-patscan}
+\item \textbf{blat\_seq} \eqref{sub:How-to-use-BLAT}
+\item \textbf{blast\_seq} \eqref{sub:How-to-use-BLAST}
+\item \textbf{vmatch\_seq} \eqref{sub:How-to-use-Vmatch}
+\end{itemize}
+
+\subsection{How to search sequences for a pattern?\label{sub:How-to-use-patscan}}
+
+It is possible to search sequences in the data stream for patterns
+using the \textbf{patscan\_seq} biotool which utilizes the powerful
+scan\_for\_matches engine. Consult the documentation for scan\_for\_matches
+in order to learn how to define patterns (the documentation is included
+in Appendix~\ref{sec:scan_for_matches-README}).
+
+To search all sequences for a simple pattern consisting of the sequence
+ATCGATCG allowing for 3 mismatches, 2 insertions and 1 deletion:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~patscan\_seq~-{}-pattern='ATCGATCG{[}3,2,1]'
+\end{lyxcode}
+The -\/-pattern switch takes a comma seperated list of patterns,
+so if you want to search for more that one pattern do:
+
+\begin{lyxcode}
+...~|~patscan\_seq~-{}-pattern='ATCGATCG{[}3,2,1],GCTAGCTA{[}3,2,1]'
+\end{lyxcode}
+It is also possible to have a list of different patterns to search
+for in a file with one pattern per line. In order to get \textbf{patscan\_seq}
+to read these patterns use the -\/-pattern\_in switch:
+
+\begin{lyxcode}
+...~|~patscan\_seq~-{}-pattern\_in=<file>
+\end{lyxcode}
+To also scan the complementary strand in nucleotide sequences (\textbf{patscan\_seq}
+automagically determines the sequence type) you need to add the -\/-comp
+switch:
+
+\begin{lyxcode}
+...~|~patscan\_seq~-{}-pattern=<pattern>~-{}-comp
+\end{lyxcode}
+It is also possible to use \textbf{patscan\_seq} to output those records
+that does not contain a certain pattern by using the -\/-invert switch:
+
+\begin{lyxcode}
+...~|~patscan\_seq~-{}-pattern=<pattern>~-{}-invert
+\end{lyxcode}
+Finally, \textbf{patscan\_seq} can also scan for patterns in a given
+genome sequence, instead of sequences in the stream, using the -\/-genome
+switch:
+
+\begin{lyxcode}
+patscan~-{}-pattern=<pattern>~-{}-genome=<genome>
+\end{lyxcode}
+
+\subsection{How to use BLAT for sequence search?\label{sub:How-to-use-BLAT}}
+
+Sequences in the data stream can be matched against supported genomes
+using \textbf{blat\_seq} which is a biotool using BLAT as the name
+might suggest. Currently only Mouse and Human genomes are available
+and it is not possible to use OOC files since there is still a need
+for a local repository for genome files. Otherwise it is just:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~blat\_seq~-{}-genome=<genome>
+\end{lyxcode}
+The search results can then be written to file with \textbf{write\_psl}
+(see~\ref{sub:How-to-write-PSL}) or \textbf{write\_bed} (see~\ref{sub:How-to-write-BED})
+allthough with \textbf{write\_bed} some information will be lost).
+It is also possible to plot chromosome distribution of the search
+results using \textbf{plot\_chrdist} (see~\ref{sub:How-to-plot-chrdist})
+or the distribution of the match lengths using \textbf{plot\_lendist}
+(see~\ref{sub:How-to-plot-lendist}) or a karyogram with the hits
+using \textbf{plot\_karyogram} (see~\ref{sub:How-to-plot-karyogram}).
+
+
+\subsection{How to use BLAST for sequence search?\label{sub:How-to-use-BLAST}}
+
+Two biotools exist for blasting sequences: \textbf{create\_blast\_db}
+is used to create the BLAST database required for BLAST which is queried
+using the biotool \textbf{blast\_seq}. So in order to create a BLAST
+database from sequences in the data stream you simple run:
+
+\begin{lyxcode}
+...~|~create\_blast\_db~-{}-database=my\_database~-{}-no\_stream
+\end{lyxcode}
+The type of sequence to use for the database is automagically determined
+by \textbf{create\_blast\_db}, but don't have a mixture of peptide
+and nucleic acids sequences in the stream. The -\/-database switch
+takes a path as argument, but will default to 'blastdb\_<time\_stamp>
+if not set.
+
+The resulting database can now be queried with sequences in another
+data stream using \textbf{blast\_seq}:
+
+\begin{lyxcode}
+...~|~blast\_seq~-{}-database=my\_database
+\end{lyxcode}
+Again, the sequence type is determined automagically and the appropriate
+BLAST program is guessed (see below table), however, the program name
+can be overruled with the -\/-program switch.
+
+\noindent \begin{center}
+\begin{tabular}{ccc}
+Subject sequence & Query sequence & Program guess\tabularnewline
+\hline
+Nucleotide & Nucleotide & blastn\tabularnewline
+Protein & Protein & blastp\tabularnewline
+Protein & Nucleotide & blastx\tabularnewline
+Nucleotide & Protein & tblastn\tabularnewline
+\end{tabular}
+\par\end{center}
+
+Finally, it is also possible to use \textbf{blast\_seq} for blasting
+sequences agains a preformatted genome using the -\/-genome switch
+instead of the -\/-database switch:
+
+\begin{lyxcode}
+...~|~blast\_seq~-{}-genome=<genome>
+\end{lyxcode}
+
+\subsection{How to use Vmatch for sequence search?\label{sub:How-to-use-Vmatch}}
+
+The powerful suffix array software package Vmatch%
+\footnote{\url{http://www.vmatch.de/}%
+} can be used for exact mapping of sequences against indexed genomes
+using the biotool \textbf{vmatch\_seq}, which will e.g. map 700000
+ESTs to the human genome locating all 160 mio hits in less than an
+hour.
+
+\begin{lyxcode}
+...~|~vmatch\_seq~-{}-genome=<genome>
+\end{lyxcode}
+Only nucleotide sequences and sequences longer than 11 nucleotides
+will be mapped. The resulting SCORE key will hold the number of genome
+matches of a given sequence (multi-mappers).
+
+
+\subsection{How to find all matches between sequences?\label{sub:How-to-find-matches}}
+
+All matches between two sequences can be determined with the biotool
+\textbf{match\_seq}. The match finding engine underneath the hood
+of \textbf{match\_seq} is the super fast suffix tree program MUMmer%
+\footnote{\url{http://mummer.sourceforge.net/}%
+}, which will locate all forward and reverse matches between huge sequences
+in a matter of minutes (if the repeat count is not too high and if
+the word size used is appropriate). Matching two \emph{Helicobacter
+pylori} genomes (1.7Mbp) takes around 10 seconds:
+
+\begin{lyxcode}
+...~|~match\_seq~-{}-word\_size=20~-{}-direction=both
+\end{lyxcode}
+The output from \textbf{match\_seq} can be used to generate a dot
+plot with \textbf{plot\_matches} (see~\ref{sub:How-to-generate-dotplot}).
+
+
+\subsection{How to align sequences?\label{sub:How-to-align}}
+
+Sequences in the stream can be aligned with the \textbf{align\_seq}
+biotool that uses Muscle%
+\footnote{\url{http://www.drive5.com/muscle/muscle.html}%
+} as aligment engine. Currently you cannot change any of the Muscle
+alignment parameters and \textbf{align\_seq} will create an alignment
+based on the defaults (which are really good!):
+
+\begin{lyxcode}
+...~|~align\_seq
+\end{lyxcode}
+The aligned output can be written to file in FASTA format using \textbf{write\_fasta}
+(see~\ref{sub:How-to-write-fasta}) or in pretty text using \textbf{write\_align}
+(see~\ref{sub:How-to-write-alignment}).
+
+
+\subsection{How to create a weight matrix?}
+
+If you want a weight matrix to show the sequence composition of a
+stack of sequences you can use the biotool create\_weight\_matrix:
+
+\begin{lyxcode}
+...~|~create\_weight\_matrix
+\end{lyxcode}
+The result can be output in percent using the -\/-percent switch:
+
+\begin{lyxcode}
+...~|~create\_weight\_matrix~-{}-percent
+\end{lyxcode}
+The weight matrix can be written as tabular output with \textbf{write\_tab}
+(see~\ref{sub:How-to-write-tab}) after removeing the records containing
+SEQ with \textbf{grab} (see~\ref{sub:How-to-grab}):
+
+\begin{lyxcode}
+...~|~create\_weight\_matrix~|~grab~-{}-invert~-{}-keys=SEQ~-{}-keys\_only
+
+|~write\_tab~-{}-no\_stream
+\end{lyxcode}
+The V0 column will hold the residue, while the rest of the columns
+will hold the frequencies for each sequence position.
+
+
+\section{Plotting}
+
+There exists several biotools for plotting. Some of these are based
+on GNUplot%
+\footnote{\url{http://www.gnuplot.info/}%
+}, which is an extremely powerful platform to generate all sorts of
+plots and even though GNUplot has quite a steep learning curve, the
+biotools utilizing GNUplot are simple to use. GNUplot is able to output
+a lot of different formats (called terminals in GNUplot), but the
+biotools focusses on three formats only:
+
+\begin{enumerate}
+\item The 'dumb' terminal is default to the GNUplot based biotools and will
+output a plot in crude ASCII text (Fig.~\ref{fig:Dumb-terminal}).
+This is quite nice for a quick and dirty plot to get an overview of
+your data .
+\item The 'post' or 'postscript' terminal output postscript code which is
+publication grade graphics that can be viewed with applications such
+as Ghostview, Photoshop, and Preview.
+\item The 'svg' terminal output's scalable vector graphics (SVG) which is
+a vector based format. SVG is great because you can edit the resulting
+plot using Photoshop or Inkscape%
+\footnote{Inkscape is a really handy drawing program that is free and open source.
+Availble at \url{http://www.inkscape.org}%
+} if you want to add additional labels, captions, arrows, and so on
+and then save the result in different formats, such as postscript
+without loosing resolution.
+\end{enumerate}
+The biotools for plotting that are not based on GNUplot only output
+SVG (that may change in the future).
+
+%
+\begin{figure}
+\noindent \begin{centering}
+\includegraphics[width=12cm]{lendist_ascii}
+\par\end{centering}
+
+\caption{\label{fig:Dumb-terminal}Dumb terminal}
+
+
+\begin{quote}
+The output of a length distribution plot in the default 'dumb terminal'
+to the terminal window.
+\end{quote}
+
+\end{figure}
+
+
+
+\subsection{How to plot a histogram?\label{How-to-plot-histogram}}
+
+A generic histogram for a given value can be plotted with the biotool
+\textbf{plot\_histogram} (Fig.~\ref{fig:Histogram}):
+
+\begin{lyxcode}
+...~|~plot\_histogram~-{}-key=TISSUE~-{}-no\_stream
+\end{lyxcode}
+(Figure missing)
+
+\noindent \begin{flushleft}
+%
+\begin{figure}
+\noindent \begin{centering}
+\includegraphics[width=12cm]{histogram}
+\par\end{centering}
+
+\caption{\label{fig:Histogram}Histogram}
+
+\end{figure}
+
+\par\end{flushleft}
+
+
+\subsection{How to plot a length distribution?\label{sub:How-to-plot-lendist}}
+
+Plotting of length distributions, weather sequence lengths, patterns
+lengths, hit lengths, \emph{etc.} is a really handy thing and can
+be done with the the biotool \textbf{plot\_lendist}. If you have a
+file with FASTA entries and want to plot the length distribution you
+do it like this:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~length\_seq
+
+|~plot\_lendist~-{}-key=SEQ\_LEN~-{}-no\_stream
+\end{lyxcode}
+The result will be written to the default dumb terminal and will look
+like Fig.~\ref{fig:Dumb-terminal}.
+
+If you instead want the result in postscript format you can do:
+
+\begin{lyxcode}
+...~|~plot\_lendist~-{}-key=SEQ\_LEN~-{}-terminal=post~-{}-result\_out=file.ps
+\end{lyxcode}
+That will generate the plot and save it to file, but not interrupt
+the data stream which can then be used in further analysis. You can
+also save the plot implicetly using '>', however, it is then important
+to terminate the stream with the -\/-no\_stream switch:
+
+\begin{lyxcode}
+...~|~plot\_lendist~-{}-key=SEQ\_LEN~-{}-terminal=post~-{}-no\_stream~>~file.ps
+\end{lyxcode}
+The resulting plot can be seen in Fig.~\ref{fig:Length-distribution}.
+
+%
+\begin{figure}
+
+
+\noindent \begin{centering}
+\includegraphics[width=12cm]{lendist}
+\par\end{centering}
+
+\caption{\label{fig:Length-distribution}Length distribution}
+
+
+\begin{quote}
+Length distribution of 630 piRNA like RNAs.
+\end{quote}
+
+\end{figure}
+
+
+
+\subsection{How to plot a chromosome distribution?\label{sub:How-to-plot-chrdist}}
+
+If you have the result of a sequence search against a multi chromosome
+genome, it is very practical to be able to plot the distribution of
+search hits on the different chromosomes. This can be done with \textbf{plot\_chrdist}:
+
+\begin{lyxcode}
+read\_fasta~-{}-data\_in=<file>~|~blat\_genome~|~plot\_chrdist~-{}-no\_stream
+\end{lyxcode}
+The above example will result in a crude plot using the 'dumb' terminal,
+and if you want to mess around with the results from the BLAT search
+you probably want to save the result to file first (see~\ref{sub:How-to-write-PSL}).
+To plot the chromosome distribution from the saved search result you
+can do:
+
+\begin{lyxcode}
+read\_bed~-{}-data\_in=file.bed~|~plot\_chrdist~-{}-terminal=post~-{}-result\_out=plot.ps
+\end{lyxcode}
+That will result in the output show in Fig.~\ref{fig:Chromosome-distribution}.
+
+%
+\begin{figure}
+
+
+\noindent \begin{centering}
+\includegraphics[angle=90,width=12cm]{chrdist}
+\par\end{centering}
+
+\caption{\label{fig:Chromosome-distribution}Chromosome distribution}
+
+\end{figure}
+
+
+
+\subsection{How to generate a dotplot?\label{sub:How-to-generate-dotplot}}
+
+A dotplot is a powerful way to get an overview of the size and location
+of sequence insertions, deletions, and duplications between two sequences.
+Generating a dotplot with biotools is a two step process where you
+initially find all matches between two sequences using the tool \textbf{match\_seq}
+(see~\ref{sub:How-to-find-matches}) and plot the resulting matches
+with \textbf{plot\_matches}. Matching and plotting two \emph{Helicobacter
+pylori} genomes (1.7Mbp) takes around 10 seconds:
+
+\begin{lyxcode}
+...~|~match\_seq~|~plot\_matches~-{}-terminal=post~-{}-result\_out=plot.ps
+\end{lyxcode}
+The resulting dotplot is in Fig.~\ref{fig:Dotplot}.
+
+%
+\begin{figure}
+\noindent \begin{centering}
+\includegraphics[width=12cm]{dotplot}
+\par\end{centering}
+
+\caption{\label{fig:Dotplot}Dotplot}
+
+
+\begin{quote}
+Forward matches are displayed in green while reverse matches are displayed
+in red.
+\end{quote}
+
+\end{figure}
+
+
+
+\subsection{How to plot a sequence logo?}
+
+Sequence logos can be generate with \textbf{plot\_seqlogo}. The sequnce
+type is determined automagically and an entropy scale of 2 bits and
+4 bits is used for nucleotide and peptide sequences, respectively%
+\footnote{\url{http://www.ccrnp.ncifcrf.gov/~toms/paper/hawaii/latex/node5.html}%
+}.
+
+\begin{lyxcode}
+...~|~plot\_seqlogo~-{}-no\_stream~-{}-result\_out=seqlogo.svg
+\end{lyxcode}
+An example of a sequence logo can be seen in Fig.~\ref{fig:Sequence-logo}.
+
+%
+\begin{figure}
+\noindent \begin{centering}
+\includegraphics[width=12cm]{seqlogo}
+\par\end{centering}
+
+\caption{\label{fig:Sequence-logo}Sequence logo}
+
+\end{figure}
+
+
+
+\subsection{How to plot a karyogram?\label{sub:How-to-plot-karyogram}}
+
+To plot search hits on genomes use \textbf{plot\_karyogram}, which
+will output a nice karyogram in SVG graphics:
+
+\begin{lyxcode}
+...~|~plot\_karyogram~-{}-result\_out=karyogram.svg
+\end{lyxcode}
+The banding data is taken from the UCSC genome browser database and
+currently only Human and Mouse is supported. Fig.~\ref{fig:Karyogram}
+shows the distribution of piRNA like RNAs matched to the Human genome.
+
+%
+\begin{figure}
+\noindent \begin{centering}
+\includegraphics[width=12cm]{karyogram}
+\par\end{centering}
+
+\caption{\label{fig:Karyogram}Karyogram}
+
+
+\begin{quote}
+Hits from a search of piRNA like RNAs in the Human genome is displayed
+as short horizontal bars.
+\end{quote}
+
+\end{figure}
+
+
+
+\section{Uploading Results}
+
+
+\subsection{How do I display my results in the UCSC Genome Browser?}
+
+Results from the list of biotools below can be uploaded directly to
+a local mirror of the UCSC Genome Browser using the biotool \textbf{upload\_to\_ucsc}:
+
+\begin{itemize}
+\item patscan\_seq \eqref{sub:How-to-use-patscan}
+\item blat\_seq \eqref{sub:How-to-use-BLAT}
+\item blast\_seq \eqref{sub:How-to-use-BLAST}
+\item vmatch\_seq \eqref{sub:How-to-use-Vmatch}
+\end{itemize}
+The syntax for uploading data the most simple way requires two mandatory
+switches: -\/-database, which is the UCSC database name (such as
+hg18, mm9, etc.) and-\/-table which should be the users initials
+followed by an underscore and a short description of the data:
+
+\begin{lyxcode}
+...~|~upload\_to\_ucsc~-{}-database=hg18~-{}-table=mah\_snoRNAs
+\end{lyxcode}
+The \textbf{upload\_to\_ucsc} biotool modifies the users \textasciitilde{}/ucsc/my\_tracks.ra
+file automagically (a backup is created with the name \textasciitilde{}/ucsc/my\_tracks.ra\textasciitilde{})
+with default values that can be overridden using the following switches:
+
+\begin{itemize}
+\item -\/-short\_label - Short label for track - Default=database->table
+\item -\/-long\_label - Long label for track - Default=database->table
+\item -\/-group - Track group name - Default=<user name as defined in env>
+\item -\/-priority - Track display priority - Default=1
+\item -\/-color - Track color - Default=147,73,42
+\item -\/-chunk\_size - Chunks for loading - Default=10000000
+\item -\/-visibility - Track visibility - Default=pack
+\end{itemize}
+Also, data in BED or PSL format can be uploaded with \textbf{upload\_to\_ucsc}
+as long as these reference to genomes and chromosomes existing in
+the UCSC Genome Browser:
+
+\begin{lyxcode}
+read\_bed~-{}-data\_in=<bed~file>~|~upload\_to\_ucsc~...
+
+
+
+read\_psl~-{}-data\_in=<psl~file>~|~upload\_to\_ucsc~...
+\end{lyxcode}
+
+\section{Trouble shooting}
+
+Shoot the messenger!
+
+\appendix
+
+\section{Keys\label{sec:Keys}}
+
+HIT
+
+HIT\_BEG
+
+HIT\_END
+
+HIT\_LEN
+
+HIT\_NAME
+
+PATTERN
+
+
+\section{Switches\label{sec:Switches}}
+
+-\/-stream\_in
+
+-\/-stream\_out
+
+-\/-no\_stream
+
+-\/-data\_in
+
+-\/-result\_out
+
+-\/-num
+
+
+\section{scan\_for\_matches README\label{sec:scan_for_matches-README}}
+
+\begin{lyxcode}
+~~~~~~~~~~~~~~~~~~~~~~~~~~scan\_for\_matches:
+
+~~~~A~Program~to~Scan~Nucleotide~or~Protein~Sequences~for~Matching~Patterns
+
+~~~~~~~~~~~~~~~~~~~~~~~~Ross~Overbeek
+
+~~~~~~~~~~~~~~~~~~~~~~~~MCS
+
+~~~~~~~~~~~~~~~~~~~~~~~~Argonne~National~Laboratory
+
+~~~~~~~~~~~~~~~~~~~~~~~~Argonne,~IL~60439
+
+~~~~~~~~~~~~~~~~~~~~~~~~USA
+
+Scan\_for\_matches~is~a~utility~that~we~have~written~to~search~for
+
+patterns~in~DNA~and~protein~sequences.~~I~wrote~most~of~the~code,
+
+although~David~Joerg~and~Morgan~Price~wrote~sections~of~an
+
+earlier~version.~~The~whole~notion~of~pattern~matching~has~a~rich
+
+history,~and~we~borrowed~liberally~from~many~sources.~~However,~it~is
+
+worth~noting~that~we~were~strongly~influenced~by~the~elegant~tools
+
+developed~and~distributed~by~David~Searls.~~My~intent~is~to~make~the
+
+existing~tool~available~to~anyone~in~the~research~community~that~might
+
+find~it~useful.~~I~will~continue~to~try~to~fix~bugs~and~make~suggested
+
+enhancements,~at~least~until~I~feel~that~a~superior~tool~exists.
+
+Hence,~I~would~appreciate~it~if~all~bug~reports~and~suggestions~are
+
+directed~to~me~at~Overbeek@mcs.anl.gov.~~
+
+I~will~try~to~log~all~bug~fixes~and~report~them~to~users~that~send~me
+
+their~email~addresses.~~I~do~not~require~that~you~give~me~your~name
+
+and~address.~~However,~if~you~do~give~it~to~me,~I~will~try~to~notify
+
+you~of~serious~problems~as~they~are~discovered.
+
+Getting~Started:
+
+~~~~The~distribution~should~contain~at~least~the~following~programs:
+
+~~~~~~~~~~~~~~~~README~~~~~~~~~~~~~~~~~~-~~~~~This~document
+
+~~~~~~~~~~~~~~~~ggpunit.c~~~~~~~~~~~~~~~-~~~~~One~of~the~two~source~files
+
+~~~~~~~~~~~~~~~~scan\_for\_matches.c~~~~~~-~~~~~The~second~source~file
+
+~~~~~~~~~~~~~~~~
+
+~~~~~~~~~~~~~~~~run\_tests~~~~~~~~~~~~~~~-~~~~~A~perl~script~to~test~things
+
+~~~~~~~~~~~~~~~~show\_hits~~~~~~~~~~~~~~~-~~~~~A~handy~perl~script
+
+~~~~~~~~~~~~~~~~test\_dna\_input~~~~~~~~~~-~~~~~Test~sequences~for~DNA
+
+~~~~~~~~~~~~~~~~test\_dna\_patterns~~~~~~~-~~~~~Test~patterns~for~DNA~scan
+
+~~~~~~~~~~~~~~~~test\_output~~~~~~~~~~~~~-~~~~~Desired~output~from~test
+
+~~~~~~~~~~~~~~~~test\_prot\_input~~~~~~~~~-~~~~~Test~protein~sequences
+
+~~~~~~~~~~~~~~~~test\_prot\_patterns~~~~~~-~~~~~Test~patterns~for~proteins
+
+~~~~~~~~~~~~~~~~testit~~~~~~~~~~~~~~~~~~-~~~~~a~perl~script~used~for~test
+
+~~~~Only~the~first~three~files~are~required.~~The~others~are~useful,
+
+~~~~but~only~if~you~have~Perl~installed~on~your~system.~~If~you~do
+
+~~~~have~Perl,~I~suggest~that~you~type
+
+~~~~~~~~
+
+~~~~~~~~~~~~~~~~which~perl
+
+~~~~to~find~out~where~it~installed.~~On~my~system,~I~get~the~following
+
+~~~~response:
+
+~~~~~~~~
+
+~~~~~~~~~~~~~~~~clone\%~which~perl
+
+~~~~~~~~~~~~~~~~/usr/local/bin/perl
+
+~~~~indicating~that~Perl~is~installed~in~/usr/local/bin.~~Anyway,~once
+
+~~~~you~know~where~it~is~installed,~edit~the~first~line~of~files~
+
+~~~~~~~~testit
+
+~~~~~~~~show\_hits
+
+~~~~replacing~/usr/local/bin/perl~with~the~appropriate~location.~~I
+
+~~~~will~assume~that~you~can~do~this,~although~it~is~not~critical~(it
+
+~~~~is~needed~only~to~test~the~installation~and~to~use~the~\char`\"{}show\_hits\char`\"{}
+
+~~~~utility).~~Perl~is~not~required~to~actually~install~and~run
+
+~~~~scan\_for\_matches.~
+
+~~~~If~you~do~not~have~Perl,~I~suggest~you~get~it~and~install~it~(it
+
+~~~~is~a~wonderful~utility).~~Information~about~Perl~and~how~to~get~it
+
+~~~~can~be~found~in~the~book~\char`\"{}Programming~Perl\char`\"{}~by~Larry~Wall~and
+
+~~~~Randall~L.~Schwartz,~published~by~O'Reilly~\&~Associates,~Inc.
+
+~~~~To~get~started,~you~will~need~to~compile~the~program.~~~I~do~this
+
+~~~~using~
+
+~~~~~~~~gcc~-O~-o~scan\_for\_matches~~ggpunit.c~scan\_for\_matches.c
+
+~~~~If~you~do~not~use~GNU~C,~use~
+
+~~~~~~~~cc~-O~-DCC~-o~scan\_for\_matches~~ggpunit.c~scan\_for\_matches.c
+
+~~~~which~works~on~my~Sun.~~
+
+~~~~Once~you~have~compiled~scan\_for\_matches,~you~can~verify~that~it
+
+~~~~works~with
+
+~~~~~~~~clone\%~run\_tests~tmp
+
+~~~~~~~~clone\%~diff~tmp~test\_output
+
+~~~~You~may~get~a~few~strange~lines~of~the~sort
+
+~~~~~~~~clone\%~run\_tests~tmp
+
+~~~~~~~~rm:~tmp:~No~such~file~or~directory
+
+~~~~~~~~clone\%~diff~tmp~test\_output
+
+~~~~These~should~cause~no~concern.~~However,~if~the~\char`\"{}diff\char`\"{}~shows~that
+
+~~~~tmp~and~test\_output~are~different,~contact~me~(you~have~a
+
+~~~~problem).~
+
+~~~~You~should~now~be~able~to~use~scan\_for\_matches~by~following~the
+
+~~~~instructions~given~below~(which~is~all~the~normal~user~should~have
+
+~~~~to~understand,~once~things~are~installed~properly).
+
+~==============================================================
+
+How~to~run~scan\_for\_matches:
+
+~~~~To~run~the~program,~you~type~need~to~create~two~files
+
+~~~~1.~~the~first~file~contains~the~pattern~you~wish~to~scan~for;~I'll
+
+~~~~~~~~call~this~file~pat\_file~in~what~follows~(but~any~name~is~ok)
+
+~~~~2.~~the~second~file~contains~a~set~of~sequences~to~scan.~~These
+
+~~~~~~~~should~be~in~\char`\"{}fasta~format\char`\"{}.~~Just~look~at~the~contents~of
+
+~~~~~~~~test\_dna\_input~to~see~examples~of~this~format.~~Basically,
+
+~~~~~~~~each~sequence~begins~with~a~line~of~the~form
+
+~~~~~~~~~~~>sequence\_id
+
+~~~~~~~~and~is~followed~by~one~or~more~lines~containing~the~sequence.
+
+~~~~Once~these~files~have~been~created,~you~just~use
+
+~~~~~~~~scan\_for\_matches~pat\_file~<~input\_file
+
+~~~~to~scan~all~of~the~input~sequences~for~the~given~pattern.~~As~an
+
+~~~~example,~suppose~that~pat\_file~contains~a~single~line~of~the~form
+
+~~~~~~~~~~~~~~~~p1=4...7~3...8~\textasciitilde{}p1
+
+~~~~Then,
+
+~~~~~~~~~~~~~~~~scan\_for\_matches~pat\_file~<~test\_dna\_input
+
+~~~~should~produce~two~\char`\"{}hits\char`\"{}.~~When~I~run~this~on~my~machine,~I~get
+
+~~~~~~~~clone\%~scan\_for\_matches~pat\_file~<~test\_dna\_input
+
+~~~~~~~~>tst1:{[}6,27]
+
+~~~~~~~~cguaacc~ggttaacc~gguuacg~
+
+~~~~~~~~>tst2:{[}6,27]
+
+~~~~~~~~CGUAACC~GGTTAACC~GGUUACG~
+
+~~~~~~~~clone\%~
+
+Simple~Patterns~Built~by~Matching~Ranges~and~Reverse~Complements
+
+~~~~Let~me~first~explain~this~simple~pattern:
+
+~~~~~~~~~~~~~~~~
+
+~~~~~~~~~~~~~~~~p1=4...7~3...8~\textasciitilde{}p1
+
+~~~~The~pattern~consists~of~three~\char`\"{}pattern~units\char`\"{}~separated~by~spaces.
+
+~~~~The~first~pattern~unit~is
+
+~~~~~~~~~~~~~~~~p1=4...7
+
+~~~~which~means~\char`\"{}match~4~to~7~characters~and~call~them~p1\char`\"{}.~~The
+
+~~~~second~pattern~unit~is
+
+~~~~~~~~~~~~~~~~3...8
+
+~~~~which~means~\char`\"{}then~match~3~to~8~characters\char`\"{}.~~The~last~pattern~unit
+
+~~~~is~
+
+~~~~~~~~~~~~~~~~\textasciitilde{}p1
+
+~~~~which~means~\char`\"{}match~the~reverse~complement~of~p1\char`\"{}.~~The~first
+
+~~~~reported~hit~is~shown~as
+
+~~~~~~~~>tst1:{[}6,27]
+
+~~~~~~~~cguaacc~ggttaacc~gguuacg~
+
+~~~~which~states~that~characters~6~through~27~of~sequence~tst1~were
+
+~~~~matched.~~\char`\"{}cguaac\char`\"{}~matched~the~first~pattern~unit,~\char`\"{}ggttaacc\char`\"{}~the
+
+~~~~second,~and~\char`\"{}gguuacg\char`\"{}~the~third.~~This~is~an~example~of~a~common
+
+~~~~type~of~pattern~used~to~search~for~sections~of~DNA~or~RNA~that
+
+~~~~would~fold~into~a~hairpin~loop.
+
+Searching~Both~Strands
+
+~~~~Now~for~a~short~aside:~scan\_for\_matches~only~searched~the
+
+~~~~sequences~in~the~input~file;~it~did~not~search~the~opposite
+
+~~~~strand.~~With~a~pattern~of~the~sort~we~just~used,~there~is~not
+
+~~~~need~o~search~the~opposite~strand.~~However,~it~is~normally~the
+
+~~~~case~that~you~will~wish~to~search~both~the~sequence~and~the
+
+~~~~opposite~strand~(i.e.,~the~reverse~complement~of~the~sequence).
+
+~~~~To~do~that,~you~would~just~use~the~\char`\"{}-c\char`\"{}~command~line.~~For~example,
+
+~~~~~~~~scan\_for\_matches~-c~pat\_file~<~test\_dna\_input
+
+~~~~Hits~on~the~opposite~strand~will~show~a~beginning~location~greater
+
+~~~~than~te~end~location~of~the~match.
+
+Defining~Pairing~Rules~and~Allowing~Mismatches,~Insertions,~and~Deletions
+
+~~~~Let~us~stop~now~and~ask~\char`\"{}What~additional~features~would~one~need~to
+
+~~~~really~find~the~kinds~of~loop~structures~that~characterize~tRNAs,
+
+~~~~rRNAs,~and~so~forth?\char`\"{}~~I~can~immediately~think~of~two:
+
+~~~~~~~~a)~you~will~need~to~be~able~to~allow~non-standard~pairings
+
+~~~~~~~~~~~(those~other~than~G-C~and~A-U),~and
+
+~~~~~~~~b)~you~will~need~to~be~able~to~tolerate~some~number~of
+
+~~~~~~~~~~~mismatches~and~bulges.
+
+~~~~~~~~
+
+~~~~Let~me~first~show~you~how~to~handle~non-standard~\char`\"{}rules~for
+
+~~~~pairing~in~reverse~complements\char`\"{}.~~Consider~the~following~pattern,
+
+~~~~which~I~show~as~two~line~(you~may~use~as~many~lines~as~you~like~in
+
+~~~~forming~a~pattern,~although~you~can~only~break~a~pattern~at~points
+
+~~~~where~space~would~be~legal):
+
+~~~~~~~~~~~~r1=\{au,ua,gc,cg,gu,ug,ga,ag\}~
+
+~~~~~~~~~~~~p1=2...3~0...4~p2=2...5~1...5~r1\textasciitilde{}p2~0...4~\textasciitilde{}p1~~~~~~~~
+
+~~~~The~first~\char`\"{}pattern~unit\char`\"{}~does~not~actually~match~anything;~rather,
+
+~~~~it~defines~a~\char`\"{}pairing~rule\char`\"{}~in~which~standard~pairings~are
+
+~~~~allowed,~as~well~as~G-A~and~A-G~(in~case~you~wondered,~Us~and~Ts
+
+~~~~and~upper~and~lower~case~can~be~used~interchangably;~for~example
+
+~~~~r1=\{AT,UA,gc,cg\}~could~be~used~to~define~the~\char`\"{}standard~rule\char`\"{}~for
+
+~~~~pairings).~~The~second~line~consists~of~six~pattern~units~which
+
+~~~~may~be~interpreted~as~follows:
+
+~~~~~~~~~~~~p1=2...3~~~~~match~2~or~3~characters~(call~it~p1)
+
+~~~~~~~~~~~~0...4~~~~~~~~match~0~to~4~characters
+
+~~~~~~~~~~~~p2=2...5~~~~~match~2~to~5~characters~(call~it~p2)
+
+~~~~~~~~~~~~1...5~~~~~~~~match~1~to~5~characters
+
+~~~~~~~~~~~~r1\textasciitilde{}p2~~~~~~~~match~the~reverse~complement~of~p2,
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~allowing~G-A~and~A-G~pairs
+
+~~~~~~~~~~~~0...4~~~~~~~~match~0~to~4~characters~~~~~~~~
+
+~~~~~~~~~~~~\textasciitilde{}p1~~~~~~~~~~match~the~reverse~complement~of~p1
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~allowing~only~G-C,~C-G,~A-T,~and~T-A~pairs
+
+~~~~Thus,~r1\textasciitilde{}p2~means~\char`\"{}match~the~reverse~complement~of~p2~using~rule~r1\char`\"{}.
+
+~~~~Now~let~us~consider~the~issue~of~tolerating~mismatches~and~bulges.
+
+~~~~You~may~add~a~\char`\"{}qualifier\char`\"{}~to~the~pattern~unit~that~gives~the
+
+~~~~tolerable~number~of~\char`\"{}mismatches,~deletions,~and~insertions\char`\"{}.
+
+~~~~Thus,
+
+~~~~~~~~~~~~~~~~p1=10...10~3...8~\textasciitilde{}p1{[}1,2,1]
+
+~~~~means~that~the~third~pattern~unit~must~match~10~characters,
+
+~~~~allowing~one~\char`\"{}mismatch\char`\"{}~(a~pairing~other~than~G-C,~C-G,~A-T,~or
+
+~~~~T-A),~two~deletions~(a~deletion~is~a~character~that~occurs~in~p1,
+
+~~~~but~has~been~\char`\"{}deleted\char`\"{}~from~the~string~matched~by~\textasciitilde{}p1),~and~one
+
+~~~~insertion~(an~\char`\"{}insertion\char`\"{}~is~a~character~that~occurs~in~the~string
+
+~~~~matched~by~\textasciitilde{}p1,~but~not~for~which~no~corresponding~character
+
+~~~~occurs~in~p1).~~In~this~case,~the~pattern~would~match
+
+~~~~~~~~~~~~~~ACGTACGTAC~GGGGGGGG~GCGTTACCT
+
+~~~~which~is,~you~must~admit,~a~fairly~weak~loop.~~It~is~common~to
+
+~~~~allow~mismatches,~but~you~will~find~yourself~using~insertions~and
+
+~~~~deletions~much~more~rarely.~~In~any~event,~you~should~note~that
+
+~~~~allowing~mismatches,~insertions,~and~deletions~does~force~the
+
+~~~~program~to~try~many~additional~possible~pairings,~so~it~does~slow
+
+~~~~things~down~a~bit.
+
+How~Patterns~Are~Matched
+
+~~~~Now~is~as~good~a~time~as~any~to~discuss~the~basic~flow~of~control
+
+~~~~when~matching~patterns.~~Recall~that~a~\char`\"{}pattern\char`\"{}~is~a~sequence~of
+
+~~~~\char`\"{}pattern~units\char`\"{}.~~Suppose~that~the~pattern~units~were
+
+~~~~~~~~u1~u2~u3~u4~...~un
+
+~~~~The~scan~of~a~sequence~S~begins~by~setting~the~current~position
+
+~~~~to~1.~~Then,~an~attempt~is~made~to~match~u1~starting~at~the
+
+~~~~current~position.~~Each~attempt~to~match~a~pattern~unit~can
+
+~~~~succeed~or~fail.~~If~it~succeeds,~then~an~attempt~is~made~to~match
+
+~~~~the~next~unit.~~If~it~fails,~then~an~attempt~is~made~to~find~an
+
+~~~~alternative~match~for~the~immediately~preceding~pattern~unit.~~If
+
+~~~~this~succeeds,~then~we~proceed~forward~again~to~the~next~unit.~~If
+
+~~~~it~fails~we~go~back~to~the~preceding~unit.~~This~process~is~called
+
+~~~~\char`\"{}backtracking\char`\"{}.~~If~there~are~no~previous~units,~then~the~current
+
+~~~~position~is~incremented~by~one,~and~everything~starts~again.~~This
+
+~~~~proceeds~until~either~the~current~position~goes~past~the~end~of
+
+~~~~the~sequence~or~all~of~the~pattern~units~succeed.~~On~success,
+
+~~~~scan\_for\_matches~reports~the~\char`\"{}hit\char`\"{},~the~current~position~is~set
+
+~~~~just~past~the~hit,~and~an~attempt~is~made~to~find~another~hit.
+
+~~~~If~you~wish~to~limit~the~scan~to~simply~finding~a~maximum~of,~say,
+
+~~~~10~hits,~you~can~use~the~-n~option~(-n~10~would~set~the~limit~to
+
+~~~~10~reported~hits).~~For~example,
+
+~~~~~~~~scan\_for\_matches~-c~-n~1~pat\_file~<~test\_dna\_input
+
+~~~~would~search~for~just~the~first~hit~(and~would~stop~searching~the
+
+~~~~current~sequences~or~any~that~follow~in~the~input~file).
+
+Searching~for~repeats:
+
+~~~~In~the~last~section,~I~discussed~almost~all~of~the~details
+
+~~~~required~to~allow~you~to~look~for~repeats.~~Consider~the~following
+
+~~~~set~of~patterns:
+
+~~~~~~~~p1=6...6~3...8~p1~~~(find~exact~6~character~repeat~separated
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~by~to~8~characters)
+
+~~~~~~~~p1=6...6~3..8~p1{[}1,0,0]~~~(allow~one~mismatch)
+
+~~~~~~~~p1=3...3~p1{[}1,0,0]~p1{[}1,0,0]~p1{[}1,0,0]~~
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~(match~12~characters~that~are~the~remains
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~of~a~3-character~sequence~occurring~4~times)
+
+~~~~~~~~~~~~~~~~
+
+~~~~~~~~p1=4...8~0...3~p2=6...8~p1~0...3~p2
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~(This~would~match~things~like
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ATCT~G~TCTTT~ATCT~TG~TCTTT
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~)
+
+Searching~for~particular~sequences:
+
+~~~~Occasionally,~one~wishes~to~match~a~specific,~known~sequence.
+
+~~~~In~such~a~case,~you~can~just~give~the~sequence~(along~with~an
+
+~~~~optional~statement~of~the~allowable~mismatches,~insertions,~and
+
+~~~~deletions).~~Thus,
+
+~~~~~~~~p1=6...8~GAGA~\textasciitilde{}p1~~~~(match~a~hairpin~with~GAGA~as~the~loop)
+
+~~~~~~~~RRRRYYYY~~~~~~~~~~~~~(match~4~purines~followed~by~4~pyrimidines)
+
+~~~~~~~~TATAA{[}1,0,0]~~~~~~~~~(match~TATAA,~allowing~1~mismatch)
+
+~~~~~~~~
+
+Matches~against~a~\char`\"{}weight~matrix\char`\"{}:
+
+~~~~I~will~conclude~my~examples~of~the~types~of~pattern~units
+
+~~~~available~for~matching~against~nucleotide~sequences~by~discussing~a
+
+~~~~crude~implemetation~of~matching~using~a~\char`\"{}weight~matrix\char`\"{}.~~While~I
+
+~~~~am~less~than~overwhelmed~with~the~syntax~that~I~chose,~I~think~that
+
+~~~~the~reader~should~be~aware~that~I~was~thinking~of~generating
+
+~~~~patterns~containing~such~pattern~units~automatically~from
+
+~~~~alignments~(and~did~not~really~plan~on~typing~such~things~in~by
+
+~~~~hand~very~often).~~Anyway,~suppose~that~you~wanted~to~match~a
+
+~~~~sequence~of~eight~characters.~~The~\char`\"{}consensus\char`\"{}~of~these~eight
+
+~~~~characters~is~GRCACCGS,~but~the~actual~\char`\"{}frequencies~of~occurrence\char`\"{}
+
+~~~~are~given~in~the~matrix~below.~~Thus,~the~first~character~is~an~A
+
+~~~~16\%~the~time~and~a~G~84\%~of~the~time.~~The~second~is~an~A~57\%~of
+
+~~~~the~time,~a~C~10\%~of~the~time,~a~G~29\%~of~the~time,~and~a~T~4\%~of
+
+~~~~the~time.~~
+
+~~~~~~~~~~~~~C1~~~~~C2~~~~C3~~~~C4~~~C5~~~~C6~~~~C7~~~~C8
+
+~~~~
+
+~~~~~~~A~~~~~16~~~~~57~~~~~0~~~~95~~~~0~~~~18~~~~~0~~~~~0
+
+~~~~~~~C~~~~~~0~~~~~10~~~~80~~~~~0~~100~~~~60~~~~~0~~~~50
+
+~~~~~~~G~~~~~84~~~~~29~~~~~0~~~~~0~~~~0~~~~20~~~100~~~~50
+
+~~~~~~~T~~~~~~0~~~~~~4~~~~20~~~~~5~~~~0~~~~~2~~~~~0~~~~~0~~~
+
+~~~~
+
+~~~~One~could~use~the~following~pattern~unit~to~search~for~inexact
+
+~~~~matches~related~to~such~a~\char`\"{}weight~matrix\char`\"{}:
+
+~~~~~~~~\{(16,0,84,0),(57,10,29,4),(0,80,0,20),(95,0,0,5),
+
+~~~~~~~~~(0,100,0,0),(18,60,20,2),(0,0,100,0),(0,50,50,0)\}~>~450
+
+~~~~This~pattern~unit~will~attempt~to~match~exactly~eight~characters.
+
+~~~~For~each~character~in~the~sequence,~the~entry~in~the~corresponding
+
+~~~~tuple~is~added~to~an~accumulated~sum.~~If~the~sum~is~greater~than
+
+~~~~450,~the~match~succeeds;~else~it~fails.
+
+~~~~Recently,~this~feature~was~upgraded~to~allow~ranges.~~Thus,
+
+~~600~>~~\{(16,0,84,0),(57,10,29,4),(0,80,0,20),(95,0,0,5),
+
+~~~~~~~~~(0,100,0,0),(18,60,20,2),(0,0,100,0),(0,50,50,0)\}~>~450
+
+~~~~will~work,~as~well.
+
+Allowing~Alternatives:
+
+~~~~Very~occasionally,~you~may~wish~to~allow~alternative~pattern~units
+
+~~~~(i.e.,~\char`\"{}match~either~A~or~B\char`\"{}).~~You~can~do~this~using~something
+
+~~~~like
+
+~~~~~~~~~~~~~~~~(~GAGA~|~GCGCA)
+
+~~~~which~says~\char`\"{}match~either~GAGA~or~GCGCA\char`\"{}.~~You~may~take
+
+~~~~alternatives~of~a~list~of~pattern~units,~for~example
+
+~~~~~~~~(p1=3...3~3...8~\textasciitilde{}p1~|~p1=5...5~4...4~\textasciitilde{}p1~GGG)
+
+~~~~would~match~one~of~two~sequences~of~pattern~units.~~There~is~one
+
+~~~~clumsy~aspect~of~the~syntax:~to~match~a~list~of~alternatives,~you
+
+~~~~need~to~fully~the~request.~~Thus,
+
+~~~~~~~~(GAGA~|~(GCGCA~|~TTCGA))
+
+~~~~would~be~needed~to~try~the~three~alternatives.
+
+One~Minor~Extension
+
+~~~~Sometimes~a~pattern~will~contain~a~sequence~of~distinct~ranges,
+
+~~~~and~you~might~wish~to~limit~the~sum~of~the~lengths~of~the~matched
+
+~~~~subsequences.~~~For~example,~suppose~that~you~basically~wanted~to
+
+~~~~match~something~like
+
+~~~~ARRYYTT~p1=0...5~GCA{[}1,0,0]~p2=1...6~\textasciitilde{}p1~4...8~\textasciitilde{}p2~p3=4...10~CCT
+
+~~~~but~that~the~sum~of~the~lengths~of~p1,~p2,~and~p3~must~not~exceed
+
+~~~~eight~characters.~~To~do~this,~you~could~add~
+
+~~~~~~~~length(p1+p2+p3)~<~9
+
+~~~~as~the~last~pattern~unit.~~It~will~just~succeed~or~fail~(but~does
+
+~~~~not~actually~match~any~characters~in~the~sequence).
+
+~~~~
+
+Matching~Protein~Sequences
+
+~~~~Suppose~that~the~input~file~contains~protein~sequences.~~In~this
+
+~~~~case,~you~must~invoke~scan\_for\_matches~with~the~\char`\"{}-p\char`\"{}~option.~~You
+
+~~~~cannot~use~aspects~of~the~language~that~relate~directly~to
+
+~~~~nucleotide~sequences~(e.g.,~the~-c~command~line~option~or~pattern
+
+~~~~constructs~referring~to~the~reverse~complement~of~a~previously
+
+~~~~matched~unit).~~
+
+~~~~You~also~have~two~additional~constructs~that~allow~you~to~match
+
+~~~~either~\char`\"{}one~of~a~set~of~amino~acids\char`\"{}~or~\char`\"{}any~amino~acid~other~than
+
+~~~~those~a~given~set\char`\"{}.~~For~example,
+
+~~~~~~~~p1=0...4~any(HQD)~1...3~notany(HK)~p1
+
+~~~~would~successfully~match~a~string~like
+
+~~~~~~~~~~~YWV~D~AA~C~YWV
+
+Using~the~show\_hits~Utility
+
+~~~~When~viewing~a~large~set~of~complex~matches,~you~might~find~it
+
+~~~~convenient~to~post-process~the~scan\_for\_matches~output~to~get~a
+
+~~~~more~readable~version.~~We~provide~a~simple~post-processor~called
+
+~~~~\char`\"{}show\_hits\char`\"{}.~~To~see~its~effect,~just~pipe~the~output~of~a
+
+~~~~scan\_for\_matches~into~show\_hits:
+
+~~~~~Normal~Output:
+
+~~~~~~~~clone\%~scan\_for\_matches~-c~pat\_file~<~tmp
+
+~~~~~~~~>tst1:{[}1,28]
+
+~~~~~~~~gtacguaacc~~ggttaac~cgguuacgtac~
+
+~~~~~~~~>tst1:{[}28,1]
+
+~~~~~~~~gtacgtaacc~~ggttaac~cggttacgtac~
+
+~~~~~~~~>tst2:{[}2,31]
+
+~~~~~~~~CGTACGUAAC~C~GGTTAACC~GGUUACGTACG~
+
+~~~~~~~~>tst2:{[}31,2]
+
+~~~~~~~~CGTACGTAAC~C~GGTTAACC~GGTTACGTACG~
+
+~~~~~~~~>tst3:{[}3,32]
+
+~~~~~~~~gtacguaacc~g~gttaactt~cgguuacgtac~
+
+~~~~~~~~>tst3:{[}32,3]
+
+~~~~~~~~gtacgtaacc~g~aagttaac~cggttacgtac~
+
+~~~~~Piped~Through~show\_hits:
+
+~~~~
+
+~~~~~~~~clone\%~scan\_for\_matches~-c~pat\_file~<~tmp~|~show\_hits
+
+~~~~~~~~tst1:{[}1,28]:~~gtacguaacc~~~ggttaac~~cgguuacgtac
+
+~~~~~~~~tst1:{[}28,1]:~~gtacgtaacc~~~ggttaac~~cggttacgtac
+
+~~~~~~~~tst2:{[}2,31]:~~CGTACGUAAC~C~GGTTAACC~GGUUACGTACG
+
+~~~~~~~~tst2:{[}31,2]:~~CGTACGTAAC~C~GGTTAACC~GGTTACGTACG
+
+~~~~~~~~tst3:{[}3,32]:~~gtacguaacc~g~gttaactt~cgguuacgtac
+
+~~~~~~~~tst3:{[}32,3]:~~gtacgtaacc~g~aagttaac~cggttacgtac
+
+~~~~~~~~clone\%~
+
+~~~~Optionally,~you~can~specify~which~of~the~\char`\"{}fields\char`\"{}~in~the~matches
+
+~~~~you~wish~to~sort~on,~and~show\_hits~will~sort~them.~~The~field
+
+~~~~numbers~start~with~0.~~So,~you~might~get~something~like
+
+~~~~~~~~clone\%~scan\_for\_matches~-c~pat\_file~<~tmp~|~show\_hits~2~1
+
+~~~~~~~~tst2:{[}2,31]:~~CGTACGUAAC~C~GGTTAACC~GGUUACGTACG
+
+~~~~~~~~tst2:{[}31,2]:~~CGTACGTAAC~C~GGTTAACC~GGTTACGTACG
+
+~~~~~~~~tst3:{[}32,3]:~~gtacgtaacc~g~aagttaac~cggttacgtac
+
+~~~~~~~~tst1:{[}1,28]:~~gtacguaacc~~~ggttaac~~cgguuacgtac
+
+~~~~~~~~tst1:{[}28,1]:~~gtacgtaacc~~~ggttaac~~cggttacgtac
+
+~~~~~~~~tst3:{[}3,32]:~~gtacguaacc~g~gttaactt~cgguuacgtac
+
+~~~~~~~~clone\%~
+
+~~~~In~this~case,~the~hits~have~been~sorted~on~fields~2~and~1~(that~is,
+
+~~~~the~third~and~second~matched~subfields).
+
+~~~~show\_hits~is~just~one~possible~little~post-processor,~and~you
+
+~~~~might~well~wish~to~write~a~customized~one~for~yourself.
+
+Reducing~the~Cost~of~a~Search
+
+~~~~The~scan\_for\_matches~utility~uses~a~fairly~simple~search,~and~may
+
+~~~~consume~large~amounts~of~CPU~time~for~complex~patterns.~~Someday,
+
+~~~~I~may~decide~to~optimize~the~code.~~However,~until~then,~let~me
+
+~~~~mention~one~useful~technique.~~
+
+~~~~When~you~have~a~complex~pattern~that~includes~a~number~of~varying
+
+~~~~ranges,~imprecise~matches,~and~so~forth,~it~is~useful~to
+
+~~~~\char`\"{}pipeline\char`\"{}~matches.~~That~is,~form~a~simpler~pattern~that~can~be
+
+~~~~used~to~scan~through~a~large~database~extracting~sections~that
+
+~~~~might~be~matched~by~the~more~complex~pattern.~~Let~me~illustrate
+
+~~~~with~a~short~example.~~Suppose~that~you~really~wished~to~match~the
+
+~~~~pattern~
+
+~~~~p1=3...5~0...8~\textasciitilde{}p1{[}1,1,0]~p2=6...7~3...6~AGC~3...5~RYGC~\textasciitilde{}p2{[}1,0,0]
+
+~~~~In~this~case,~the~pattern~units~AGC~3...5~RYGC~can~be~used~to~rapidly
+
+~~~~constrain~the~overall~search.~~You~can~preprocess~the~overall
+
+~~~~database~using~the~pattern:
+
+~~~~~~~~~~31...31~AGC~3...5~RYGC~7...7
+
+~~~~Put~the~complex~pattern~in~pat\_file1~and~the~simpler~pattern~in
+
+~~~~pat\_file2.~~Then~use,
+
+~~~~~~~~scan\_for\_matches~-c~pat\_file2~<~nucleotide\_database~|
+
+~~~~~~~~scan\_for\_matches~pat\_file1
+
+~~~~The~output~will~show~things~like
+
+~~~~>seqid:{[}232,280]{[}2,47]
+
+~~~~matches~pieces
+
+~~~~Then,~the~actual~section~of~the~sequence~that~was~matched~can~be
+
+~~~~easily~computed~as~{[}233,278]~(remember,~the~positions~start~from
+
+~~~~1,~not~0).
+
+~~~~Let~me~finally~add,~you~should~do~a~few~short~experiments~to~see
+
+~~~~whether~or~not~such~pipelining~actually~improves~performance~-{}-~it
+
+~~~~is~not~always~obvious~where~the~time~is~going,~and~I~have
+
+~~~~sometimes~found~that~the~added~complexity~of~pipelining~actually
+
+~~~~slowed~things~up.~~It~gets~its~best~improvements~when~there~are
+
+~~~~exact~matches~of~more~than~just~a~few~characters~that~can~be
+
+~~~~rapidly~used~to~eliminate~large~sections~of~the~database.
+
+=============
+
+Additions:
+
+Feb~9,~1995:~~~the~pattern~units~\textasciicircum{}~and~\$~now~work~as~in~normal~regular
+
+~~~~~~~~~~~~~~~expressions.~~That~is
+
+~~~~~~~~~~~~~~~~~~~~~~~~TTF~\$
+
+~~~~~~~~~~~~~~~matches~only~TTF~at~the~end~of~the~string~and~
+
+~~~~~~~~~~~~~~~~~~~~~~~~\textasciicircum{}~TTF~
+
+~~~~~~~~~~~~~~~matches~only~an~initial~TTF
+
+~~~~~~~~~~~~~~~The~pattern~unit~
+
+~~~~~~~~~~~~~~~~~~~~~~~~<p1
+
+~~~~~~~~~~~~~~~matches~the~reverse~of~the~string~named~p1.~~That~is,
+
+~~~~~~~~~~~~~~~if~p1~matched~GCAT,~then~<p1~would~match~TACG.~~Thus,
+
+~~~~~~~~~~~~~~~~~~~p1=6...6~<p1
+
+~~~~~~~~~~~~~~~matches~a~real~palindrome~(not~the~biologically~common
+
+~~~~~~~~~~~~~~~meaning~of~\char`\"{}reverse~complement\char`\"{})
+
+
+\end{lyxcode}
+
+\end{document}
--- /dev/null
+%!PS-Adobe-2.0
+%%Creator: gnuplot 4.2 patchlevel 0
+%%CreationDate: Mon Sep 3 10:26:25 2007
+%%DocumentFonts: (atend)
+%%BoundingBox: 50 50 554 770
+%%Orientation: Landscape
+%%Pages: (atend)
+%%EndComments
+%%BeginProlog
+/gnudict 256 dict def
+gnudict begin
+%
+% The following 6 true/false flags may be edited by hand if required
+% The unit line width may also be changed
+%
+/Color false def
+/Blacktext false def
+/Solid false def
+/Dashlength 1 def
+/Landscape true def
+/Level1 false def
+/Rounded false def
+/TransparentPatterns false def
+/gnulinewidth 5.000 def
+/userlinewidth gnulinewidth def
+%
+/vshift -33 def
+/dl1 {
+ 10.0 Dashlength mul mul
+ Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if
+} def
+/dl2 {
+ 10.0 Dashlength mul mul
+ Rounded { currentlinewidth 0.75 mul add } if
+} def
+/hpt_ 31.5 def
+/vpt_ 31.5 def
+/hpt hpt_ def
+/vpt vpt_ def
+Level1 {} {
+/SDict 10 dict def
+systemdict /pdfmark known not {
+ userdict /pdfmark systemdict /cleartomark get put
+} if
+SDict begin [
+ /Title ()
+ /Subject (gnuplot plot)
+ /Creator (gnuplot 4.2 patchlevel 0)
+ /Author (Martin Hansen)
+% /Producer (gnuplot)
+% /Keywords ()
+ /CreationDate (Mon Sep 3 10:26:25 2007)
+ /DOCINFO pdfmark
+end
+} ifelse
+%
+% Gnuplot Prolog Version 4.2 (August 2006)
+%
+/M {moveto} bind def
+/L {lineto} bind def
+/R {rmoveto} bind def
+/V {rlineto} bind def
+/N {newpath moveto} bind def
+/Z {closepath} bind def
+/C {setrgbcolor} bind def
+/f {rlineto fill} bind def
+/vpt2 vpt 2 mul def
+/hpt2 hpt 2 mul def
+/Lshow {currentpoint stroke M 0 vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/Rshow {currentpoint stroke M dup stringwidth pop neg vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def
+ /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def
+/DL {Color {setrgbcolor Solid {pop []} if 0 setdash}
+ {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def
+/BL {stroke userlinewidth 2 mul setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+/AL {stroke userlinewidth 2 div setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+/UL {dup gnulinewidth mul /userlinewidth exch def
+ dup 1 lt {pop 1} if 10 mul /udl exch def} def
+/PL {stroke userlinewidth setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+% Default Line colors
+/LCw {1 1 1} def
+/LCb {0 0 0} def
+/LCa {0 0 0} def
+/LC0 {1 0 0} def
+/LC1 {0 1 0} def
+/LC2 {0 0 1} def
+/LC3 {1 0 1} def
+/LC4 {0 1 1} def
+/LC5 {1 1 0} def
+/LC6 {0 0 0} def
+/LC7 {1 0.3 0} def
+/LC8 {0.5 0.5 0.5} def
+% Default Line Types
+/LTw {PL [] 1 setgray} def
+/LTb {BL [] LCb DL} def
+/LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def
+/LT0 {PL [] LC0 DL} def
+/LT1 {PL [4 dl1 2 dl2] LC1 DL} def
+/LT2 {PL [2 dl1 3 dl2] LC2 DL} def
+/LT3 {PL [1 dl1 1.5 dl2] LC3 DL} def
+/LT4 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def
+/LT5 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC5 DL} def
+/LT6 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC6 DL} def
+/LT7 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC7 DL} def
+/LT8 {PL [2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 4 dl2] LC8 DL} def
+/Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def
+/Dia {stroke [] 0 setdash 2 copy vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath stroke
+ Pnt} def
+/Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V
+ currentpoint stroke M
+ hpt neg vpt neg R hpt2 0 V stroke
+ } def
+/Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath stroke
+ Pnt} def
+/Crs {stroke [] 0 setdash exch hpt sub exch vpt add M
+ hpt2 vpt2 neg V currentpoint stroke M
+ hpt2 neg 0 R hpt2 vpt2 V stroke} def
+/TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath stroke
+ Pnt} def
+/Star {2 copy Pls Crs} def
+/BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath fill} def
+/TriUF {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath fill} def
+/TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath stroke
+ Pnt} def
+/TriDF {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath fill} def
+/DiaF {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath fill} def
+/Pent {stroke [] 0 setdash 2 copy gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath stroke grestore Pnt} def
+/PentF {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath fill grestore} def
+/Circle {stroke [] 0 setdash 2 copy
+ hpt 0 360 arc stroke Pnt} def
+/CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def
+/C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def
+/C1 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C2 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C3 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C4 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 180 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C5 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc
+ 2 copy moveto
+ 2 copy vpt 180 270 arc closepath fill
+ vpt 0 360 arc} bind def
+/C6 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C7 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C8 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 270 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C9 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 270 450 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 90 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C11 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 180 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 270 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C12 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 180 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C13 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 180 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C14 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 360 arc closepath fill
+ vpt 0 360 arc} bind def
+/C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto
+ neg 0 rlineto closepath} bind def
+/Square {dup Rec} bind def
+/Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def
+/S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def
+/S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def
+/S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def
+/S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def
+/S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def
+/S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill
+ exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def
+/S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def
+/S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill
+ 2 copy vpt Square fill Bsquare} bind def
+/S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def
+/S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def
+/S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill
+ Bsquare} bind def
+/S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill
+ Bsquare} bind def
+/S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def
+/S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill
+ 2 copy vpt Square fill Bsquare} bind def
+/S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill
+ 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def
+/S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def
+/D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def
+/D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def
+/D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def
+/D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def
+/D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def
+/D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def
+/D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def
+/D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def
+/D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def
+/D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def
+/D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def
+/D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def
+/D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def
+/D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def
+/D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def
+/D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def
+/DiaE {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath stroke} def
+/BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath stroke} def
+/TriUE {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath stroke} def
+/TriDE {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath stroke} def
+/PentE {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath stroke grestore} def
+/CircE {stroke [] 0 setdash
+ hpt 0 360 arc stroke} def
+/Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def
+/DiaW {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V Opaque stroke} def
+/BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V Opaque stroke} def
+/TriUW {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V Opaque stroke} def
+/TriDW {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V Opaque stroke} def
+/PentW {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ Opaque stroke grestore} def
+/CircW {stroke [] 0 setdash
+ hpt 0 360 arc Opaque stroke} def
+/BoxFill {gsave Rec 1 setgray fill grestore} def
+/Density {
+ /Fillden exch def
+ currentrgbcolor
+ /ColB exch def /ColG exch def /ColR exch def
+ /ColR ColR Fillden mul Fillden sub 1 add def
+ /ColG ColG Fillden mul Fillden sub 1 add def
+ /ColB ColB Fillden mul Fillden sub 1 add def
+ ColR ColG ColB setrgbcolor} def
+/BoxColFill {gsave Rec PolyFill} def
+/PolyFill {gsave Density fill grestore grestore} def
+/h {rlineto rlineto rlineto gsave fill grestore} bind def
+%
+% PostScript Level 1 Pattern Fill routine for rectangles
+% Usage: x y w h s a XX PatternFill
+% x,y = lower left corner of box to be filled
+% w,h = width and height of box
+% a = angle in degrees between lines and x-axis
+% XX = 0/1 for no/yes cross-hatch
+%
+/PatternFill {gsave /PFa [ 9 2 roll ] def
+ PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate
+ PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec
+ gsave 1 setgray fill grestore clip
+ currentlinewidth 0.5 mul setlinewidth
+ /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def
+ 0 0 M PFa 5 get rotate PFs -2 div dup translate
+ 0 1 PFs PFa 4 get div 1 add floor cvi
+ {PFa 4 get mul 0 M 0 PFs V} for
+ 0 PFa 6 get ne {
+ 0 1 PFs PFa 4 get div 1 add floor cvi
+ {PFa 4 get mul 0 2 1 roll M PFs 0 V} for
+ } if
+ stroke grestore} def
+%
+/languagelevel where
+ {pop languagelevel} {1} ifelse
+ 2 lt
+ {/InterpretLevel1 true def}
+ {/InterpretLevel1 Level1 def}
+ ifelse
+%
+% PostScript level 2 pattern fill definitions
+%
+/Level2PatternFill {
+/Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8}
+ bind def
+/KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke}
+>> matrix makepattern
+/Pat1 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke
+ 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke}
+>> matrix makepattern
+/Pat2 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L
+ 8 8 L 8 0 L 0 0 L fill}
+>> matrix makepattern
+/Pat3 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L
+ 0 12 M 12 0 L stroke}
+>> matrix makepattern
+/Pat4 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L
+ 0 -4 M 12 8 L stroke}
+>> matrix makepattern
+/Pat5 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L
+ 0 12 M 8 -4 L 4 12 M 10 0 L stroke}
+>> matrix makepattern
+/Pat6 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L
+ 0 -4 M 8 12 L 4 -4 M 10 8 L stroke}
+>> matrix makepattern
+/Pat7 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L
+ 12 0 M -4 8 L 12 4 M 0 10 L stroke}
+>> matrix makepattern
+/Pat8 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L
+ -4 0 M 12 8 L -4 4 M 8 10 L stroke}
+>> matrix makepattern
+/Pat9 exch def
+/Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def
+/Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def
+/Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def
+/Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def
+/Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def
+/Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def
+/Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def
+} def
+%
+%
+%End of PostScript Level 2 code
+%
+/PatternBgnd {
+ TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse
+} def
+%
+% Substitute for Level 2 pattern fill codes with
+% grayscale if Level 2 support is not selected.
+%
+/Level1PatternFill {
+/Pattern1 {0.250 Density} bind def
+/Pattern2 {0.500 Density} bind def
+/Pattern3 {0.750 Density} bind def
+/Pattern4 {0.125 Density} bind def
+/Pattern5 {0.375 Density} bind def
+/Pattern6 {0.625 Density} bind def
+/Pattern7 {0.875 Density} bind def
+} def
+%
+% Now test for support of Level 2 code
+%
+Level1 {Level1PatternFill} {Level2PatternFill} ifelse
+%
+/Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont
+dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall
+currentdict end definefont pop
+end
+%%EndProlog
+%%Page: 1 1
+gnudict begin
+gsave
+50 50 translate
+0.100 0.100 scale
+90 rotate
+0 -5040 translate
+0 setgray
+newpath
+(Helvetica) findfont 100 scalefont setfont
+1.000 UL
+LTb
+410 660 M
+63 0 V
+6557 0 R
+-63 0 V
+350 660 M
+( 0) Rshow
+1.000 UL
+LTb
+410 1243 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 20) Rshow
+1.000 UL
+LTb
+410 1826 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 40) Rshow
+1.000 UL
+LTb
+410 2409 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 60) Rshow
+1.000 UL
+LTb
+410 2991 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 80) Rshow
+1.000 UL
+LTb
+410 3574 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 100) Rshow
+1.000 UL
+LTb
+410 4157 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 120) Rshow
+1.000 UL
+LTb
+410 4740 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 140) Rshow
+1.000 UL
+LTb
+698 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr1) Rshow
+grestore
+1.000 UL
+LTb
+986 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr2) Rshow
+grestore
+1.000 UL
+LTb
+1273 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr3) Rshow
+grestore
+1.000 UL
+LTb
+1561 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr4) Rshow
+grestore
+1.000 UL
+LTb
+1849 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr5) Rshow
+grestore
+1.000 UL
+LTb
+2137 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr6) Rshow
+grestore
+1.000 UL
+LTb
+2425 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr7) Rshow
+grestore
+1.000 UL
+LTb
+2713 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr8) Rshow
+grestore
+1.000 UL
+LTb
+3000 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr9) Rshow
+grestore
+1.000 UL
+LTb
+3288 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr10) Rshow
+grestore
+1.000 UL
+LTb
+3576 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr11) Rshow
+grestore
+1.000 UL
+LTb
+3864 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr12) Rshow
+grestore
+1.000 UL
+LTb
+4152 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr13) Rshow
+grestore
+1.000 UL
+LTb
+4440 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr14) Rshow
+grestore
+1.000 UL
+LTb
+4727 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr15) Rshow
+grestore
+1.000 UL
+LTb
+5015 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr16) Rshow
+grestore
+1.000 UL
+LTb
+5303 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr17) Rshow
+grestore
+1.000 UL
+LTb
+5591 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr18) Rshow
+grestore
+1.000 UL
+LTb
+5879 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr19) Rshow
+grestore
+1.000 UL
+LTb
+6167 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chrX) Rshow
+grestore
+1.000 UL
+LTb
+6454 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chr5_random) Rshow
+grestore
+1.000 UL
+LTb
+6742 660 M
+0 -60 R
+currentpoint gsave translate 90 rotate 0 0 M
+(chrY_random) Rshow
+grestore
+1.000 UL
+LTb
+1.000 UL
+LTb
+410 4740 N
+410 660 L
+6620 0 V
+0 4080 V
+-6620 0 V
+Z stroke
+3720 4890 M
+(Chromosome Distribution) Cshow
+1.000 UP
+1.000 UL
+LTb
+1.000 UL
+LT0
+/Helvetica findfont 100 scalefont setfont
+1.000 698 660 97 2012 BoxColFill
+698 660 N
+0 2011 V
+96 0 V
+794 660 L
+-96 0 V
+Z stroke
+1.000 986 660 97 4023 BoxColFill
+986 660 N
+0 4022 V
+96 0 V
+0 -4022 V
+-96 0 V
+Z stroke
+1.000 1273 660 97 1196 BoxColFill
+1273 660 N
+0 1195 V
+96 0 V
+0 -1195 V
+-96 0 V
+Z stroke
+1.000 1561 660 97 2332 BoxColFill
+1561 660 N
+0 2331 V
+96 0 V
+0 -2331 V
+-96 0 V
+Z stroke
+1.000 1849 660 97 2857 BoxColFill
+1849 660 N
+0 2856 V
+96 0 V
+0 -2856 V
+-96 0 V
+Z stroke
+1.000 2137 660 97 2187 BoxColFill
+2137 660 N
+0 2186 V
+96 0 V
+0 -2186 V
+-96 0 V
+Z stroke
+1.000 2425 660 97 3148 BoxColFill
+2425 660 N
+0 3147 V
+96 0 V
+0 -3147 V
+-96 0 V
+Z stroke
+1.000 2713 660 97 1021 BoxColFill
+2713 660 N
+0 1020 V
+96 0 V
+0 -1020 V
+-96 0 V
+Z stroke
+1.000 3000 660 97 3178 BoxColFill
+3000 660 N
+0 3177 V
+96 0 V
+0 -3177 V
+-96 0 V
+Z stroke
+1.000 3288 660 97 2274 BoxColFill
+3288 660 N
+0 2273 V
+96 0 V
+0 -2273 V
+-96 0 V
+Z stroke
+1.000 3576 660 97 1371 BoxColFill
+3576 660 N
+0 1370 V
+96 0 V
+0 -1370 V
+-96 0 V
+Z stroke
+1.000 3864 660 97 1954 BoxColFill
+3864 660 N
+0 1953 V
+96 0 V
+0 -1953 V
+-96 0 V
+Z stroke
+1.000 4152 660 97 1458 BoxColFill
+4152 660 N
+0 1457 V
+96 0 V
+0 -1457 V
+-96 0 V
+Z stroke
+1.000 4440 660 97 1400 BoxColFill
+4440 660 N
+0 1399 V
+96 0 V
+0 -1399 V
+-96 0 V
+Z stroke
+1.000 4727 660 97 2041 BoxColFill
+4727 660 N
+0 2040 V
+96 0 V
+0 -2040 V
+-96 0 V
+Z stroke
+1.000 5015 660 97 817 BoxColFill
+5015 660 N
+0 816 V
+96 0 V
+0 -816 V
+-96 0 V
+Z stroke
+1.000 5303 660 97 2566 BoxColFill
+5303 660 N
+0 2565 V
+96 0 V
+0 -2565 V
+-96 0 V
+Z stroke
+1.000 5591 660 97 1400 BoxColFill
+5591 660 N
+0 1399 V
+96 0 V
+0 -1399 V
+-96 0 V
+Z stroke
+1.000 5879 660 97 730 BoxColFill
+5879 660 N
+0 729 V
+96 0 V
+0 -729 V
+-96 0 V
+Z stroke
+1.000 6167 660 96 1138 BoxColFill
+6167 660 N
+0 1137 V
+95 0 V
+0 -1137 V
+-95 0 V
+Z stroke
+1.000 6454 660 97 59 BoxColFill
+6454 660 N
+0 58 V
+96 0 V
+0 -58 V
+-96 0 V
+Z stroke
+1.000 6742 660 97 817 BoxColFill
+6742 660 N
+0 816 V
+96 0 V
+0 -816 V
+-96 0 V
+Z stroke
+1.000 UL
+LTb
+410 4740 N
+410 660 L
+6620 0 V
+0 4080 V
+-6620 0 V
+Z stroke
+1.000 UP
+1.000 UL
+LTb
+stroke
+grestore
+end
+showpage
+%%Trailer
+%%DocumentFonts: Helvetica
+%%Pages: 1
--- /dev/null
+%!PS-Adobe-2.0
+%%Creator: gnuplot 4.2 patchlevel 0
+%%CreationDate: Mon Sep 3 10:34:33 2007
+%%DocumentFonts: (atend)
+%%BoundingBox: 50 50 554 770
+%%Orientation: Landscape
+%%Pages: (atend)
+%%EndComments
+%%BeginProlog
+/gnudict 256 dict def
+gnudict begin
+%
+% The following 6 true/false flags may be edited by hand if required
+% The unit line width may also be changed
+%
+/Color false def
+/Blacktext false def
+/Solid false def
+/Dashlength 1 def
+/Landscape true def
+/Level1 false def
+/Rounded false def
+/TransparentPatterns false def
+/gnulinewidth 5.000 def
+/userlinewidth gnulinewidth def
+%
+/vshift -46 def
+/dl1 {
+ 10.0 Dashlength mul mul
+ Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if
+} def
+/dl2 {
+ 10.0 Dashlength mul mul
+ Rounded { currentlinewidth 0.75 mul add } if
+} def
+/hpt_ 31.5 def
+/vpt_ 31.5 def
+/hpt hpt_ def
+/vpt vpt_ def
+Level1 {} {
+/SDict 10 dict def
+systemdict /pdfmark known not {
+ userdict /pdfmark systemdict /cleartomark get put
+} if
+SDict begin [
+ /Title ()
+ /Subject (gnuplot plot)
+ /Creator (gnuplot 4.2 patchlevel 0)
+ /Author (Martin Hansen)
+% /Producer (gnuplot)
+% /Keywords ()
+ /CreationDate (Mon Sep 3 10:34:33 2007)
+ /DOCINFO pdfmark
+end
+} ifelse
+%
+% Gnuplot Prolog Version 4.2 (August 2006)
+%
+/M {moveto} bind def
+/L {lineto} bind def
+/R {rmoveto} bind def
+/V {rlineto} bind def
+/N {newpath moveto} bind def
+/Z {closepath} bind def
+/C {setrgbcolor} bind def
+/f {rlineto fill} bind def
+/vpt2 vpt 2 mul def
+/hpt2 hpt 2 mul def
+/Lshow {currentpoint stroke M 0 vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/Rshow {currentpoint stroke M dup stringwidth pop neg vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def
+ /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def
+/DL {Color {setrgbcolor Solid {pop []} if 0 setdash}
+ {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def
+/BL {stroke userlinewidth 2 mul setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+/AL {stroke userlinewidth 2 div setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+/UL {dup gnulinewidth mul /userlinewidth exch def
+ dup 1 lt {pop 1} if 10 mul /udl exch def} def
+/PL {stroke userlinewidth setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+% Default Line colors
+/LCw {1 1 1} def
+/LCb {0 0 0} def
+/LCa {0 0 0} def
+/LC0 {1 0 0} def
+/LC1 {0 1 0} def
+/LC2 {0 0 1} def
+/LC3 {1 0 1} def
+/LC4 {0 1 1} def
+/LC5 {1 1 0} def
+/LC6 {0 0 0} def
+/LC7 {1 0.3 0} def
+/LC8 {0.5 0.5 0.5} def
+% Default Line Types
+/LTw {PL [] 1 setgray} def
+/LTb {BL [] LCb DL} def
+/LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def
+/LT0 {PL [] LC0 DL} def
+/LT1 {PL [4 dl1 2 dl2] LC1 DL} def
+/LT2 {PL [2 dl1 3 dl2] LC2 DL} def
+/LT3 {PL [1 dl1 1.5 dl2] LC3 DL} def
+/LT4 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def
+/LT5 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC5 DL} def
+/LT6 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC6 DL} def
+/LT7 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC7 DL} def
+/LT8 {PL [2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 4 dl2] LC8 DL} def
+/Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def
+/Dia {stroke [] 0 setdash 2 copy vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath stroke
+ Pnt} def
+/Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V
+ currentpoint stroke M
+ hpt neg vpt neg R hpt2 0 V stroke
+ } def
+/Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath stroke
+ Pnt} def
+/Crs {stroke [] 0 setdash exch hpt sub exch vpt add M
+ hpt2 vpt2 neg V currentpoint stroke M
+ hpt2 neg 0 R hpt2 vpt2 V stroke} def
+/TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath stroke
+ Pnt} def
+/Star {2 copy Pls Crs} def
+/BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath fill} def
+/TriUF {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath fill} def
+/TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath stroke
+ Pnt} def
+/TriDF {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath fill} def
+/DiaF {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath fill} def
+/Pent {stroke [] 0 setdash 2 copy gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath stroke grestore Pnt} def
+/PentF {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath fill grestore} def
+/Circle {stroke [] 0 setdash 2 copy
+ hpt 0 360 arc stroke Pnt} def
+/CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def
+/C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def
+/C1 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C2 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C3 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C4 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 180 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C5 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc
+ 2 copy moveto
+ 2 copy vpt 180 270 arc closepath fill
+ vpt 0 360 arc} bind def
+/C6 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C7 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C8 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 270 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C9 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 270 450 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 90 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C11 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 180 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 270 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C12 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 180 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C13 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 180 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C14 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 360 arc closepath fill
+ vpt 0 360 arc} bind def
+/C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto
+ neg 0 rlineto closepath} bind def
+/Square {dup Rec} bind def
+/Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def
+/S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def
+/S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def
+/S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def
+/S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def
+/S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def
+/S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill
+ exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def
+/S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def
+/S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill
+ 2 copy vpt Square fill Bsquare} bind def
+/S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def
+/S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def
+/S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill
+ Bsquare} bind def
+/S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill
+ Bsquare} bind def
+/S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def
+/S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill
+ 2 copy vpt Square fill Bsquare} bind def
+/S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill
+ 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def
+/S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def
+/D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def
+/D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def
+/D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def
+/D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def
+/D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def
+/D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def
+/D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def
+/D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def
+/D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def
+/D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def
+/D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def
+/D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def
+/D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def
+/D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def
+/D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def
+/D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def
+/DiaE {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath stroke} def
+/BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath stroke} def
+/TriUE {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath stroke} def
+/TriDE {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath stroke} def
+/PentE {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath stroke grestore} def
+/CircE {stroke [] 0 setdash
+ hpt 0 360 arc stroke} def
+/Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def
+/DiaW {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V Opaque stroke} def
+/BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V Opaque stroke} def
+/TriUW {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V Opaque stroke} def
+/TriDW {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V Opaque stroke} def
+/PentW {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ Opaque stroke grestore} def
+/CircW {stroke [] 0 setdash
+ hpt 0 360 arc Opaque stroke} def
+/BoxFill {gsave Rec 1 setgray fill grestore} def
+/Density {
+ /Fillden exch def
+ currentrgbcolor
+ /ColB exch def /ColG exch def /ColR exch def
+ /ColR ColR Fillden mul Fillden sub 1 add def
+ /ColG ColG Fillden mul Fillden sub 1 add def
+ /ColB ColB Fillden mul Fillden sub 1 add def
+ ColR ColG ColB setrgbcolor} def
+/BoxColFill {gsave Rec PolyFill} def
+/PolyFill {gsave Density fill grestore grestore} def
+/h {rlineto rlineto rlineto gsave fill grestore} bind def
+%
+% PostScript Level 1 Pattern Fill routine for rectangles
+% Usage: x y w h s a XX PatternFill
+% x,y = lower left corner of box to be filled
+% w,h = width and height of box
+% a = angle in degrees between lines and x-axis
+% XX = 0/1 for no/yes cross-hatch
+%
+/PatternFill {gsave /PFa [ 9 2 roll ] def
+ PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate
+ PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec
+ gsave 1 setgray fill grestore clip
+ currentlinewidth 0.5 mul setlinewidth
+ /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def
+ 0 0 M PFa 5 get rotate PFs -2 div dup translate
+ 0 1 PFs PFa 4 get div 1 add floor cvi
+ {PFa 4 get mul 0 M 0 PFs V} for
+ 0 PFa 6 get ne {
+ 0 1 PFs PFa 4 get div 1 add floor cvi
+ {PFa 4 get mul 0 2 1 roll M PFs 0 V} for
+ } if
+ stroke grestore} def
+%
+/languagelevel where
+ {pop languagelevel} {1} ifelse
+ 2 lt
+ {/InterpretLevel1 true def}
+ {/InterpretLevel1 Level1 def}
+ ifelse
+%
+% PostScript level 2 pattern fill definitions
+%
+/Level2PatternFill {
+/Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8}
+ bind def
+/KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke}
+>> matrix makepattern
+/Pat1 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke
+ 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke}
+>> matrix makepattern
+/Pat2 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L
+ 8 8 L 8 0 L 0 0 L fill}
+>> matrix makepattern
+/Pat3 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L
+ 0 12 M 12 0 L stroke}
+>> matrix makepattern
+/Pat4 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L
+ 0 -4 M 12 8 L stroke}
+>> matrix makepattern
+/Pat5 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L
+ 0 12 M 8 -4 L 4 12 M 10 0 L stroke}
+>> matrix makepattern
+/Pat6 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L
+ 0 -4 M 8 12 L 4 -4 M 10 8 L stroke}
+>> matrix makepattern
+/Pat7 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L
+ 12 0 M -4 8 L 12 4 M 0 10 L stroke}
+>> matrix makepattern
+/Pat8 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L
+ -4 0 M 12 8 L -4 4 M 8 10 L stroke}
+>> matrix makepattern
+/Pat9 exch def
+/Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def
+/Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def
+/Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def
+/Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def
+/Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def
+/Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def
+/Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def
+} def
+%
+%
+%End of PostScript Level 2 code
+%
+/PatternBgnd {
+ TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse
+} def
+%
+% Substitute for Level 2 pattern fill codes with
+% grayscale if Level 2 support is not selected.
+%
+/Level1PatternFill {
+/Pattern1 {0.250 Density} bind def
+/Pattern2 {0.500 Density} bind def
+/Pattern3 {0.750 Density} bind def
+/Pattern4 {0.125 Density} bind def
+/Pattern5 {0.375 Density} bind def
+/Pattern6 {0.625 Density} bind def
+/Pattern7 {0.875 Density} bind def
+} def
+%
+% Now test for support of Level 2 code
+%
+Level1 {Level1PatternFill} {Level2PatternFill} ifelse
+%
+/Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont
+dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall
+currentdict end definefont pop
+end
+%%EndProlog
+%%Page: 1 1
+gnudict begin
+gsave
+50 50 translate
+0.100 0.100 scale
+90 rotate
+0 -5040 translate
+0 setgray
+newpath
+(Helvetica) findfont 140 scalefont setfont
+gsave % colour palette begin
+/maxcolors 0 def
+/HSV2RGB { exch dup 0.0 eq {pop exch pop dup dup} % achromatic gray
+ { /HSVs exch def /HSVv exch def 6.0 mul dup floor dup 3 1 roll sub
+ /HSVf exch def /HSVi exch cvi def /HSVp HSVv 1.0 HSVs sub mul def
+ /HSVq HSVv 1.0 HSVs HSVf mul sub mul def
+ /HSVt HSVv 1.0 HSVs 1.0 HSVf sub mul sub mul def
+ /HSVi HSVi 6 mod def 0 HSVi eq {HSVv HSVt HSVp}
+ {1 HSVi eq {HSVq HSVv HSVp}{2 HSVi eq {HSVp HSVv HSVt}
+ {3 HSVi eq {HSVp HSVq HSVv}{4 HSVi eq {HSVt HSVp HSVv}
+ {HSVv HSVp HSVq} ifelse} ifelse} ifelse} ifelse} ifelse
+ } ifelse} def
+/Constrain {
+ dup 0 lt {0 exch pop}{dup 1 gt {1 exch pop} if} ifelse} def
+/YIQ2RGB {
+ 3 copy -1.702 mul exch -1.105 mul add add Constrain 4 1 roll
+ 3 copy -0.647 mul exch -0.272 mul add add Constrain 5 1 roll
+ 0.621 mul exch -0.956 mul add add Constrain 3 1 roll } def
+/CMY2RGB { 1 exch sub exch 1 exch sub 3 2 roll 1 exch sub 3 1 roll exch } def
+/XYZ2RGB { 3 copy -0.9017 mul exch -0.1187 mul add exch 0.0585 mul exch add
+ Constrain 4 1 roll 3 copy -0.0279 mul exch 1.999 mul add exch
+ -0.9844 mul add Constrain 5 1 roll -0.2891 mul exch -0.5338 mul add
+ exch 1.91 mul exch add Constrain 3 1 roll} def
+/SelectSpace {ColorSpace (HSV) eq {HSV2RGB}{ColorSpace (XYZ) eq {
+ XYZ2RGB}{ColorSpace (CMY) eq {CMY2RGB}{ColorSpace (YIQ) eq {YIQ2RGB}
+ if} ifelse} ifelse} ifelse} def
+/InterpolatedColor false def
+/cF7 {sqrt} bind def % sqrt(x)
+/cF5 {dup dup mul mul} bind def % x^3
+/cF15 {360 mul sin} bind def % sin(360x)
+/pm3dround {maxcolors 0 gt {dup 1 ge
+ {pop 1} {maxcolors mul floor maxcolors 1 sub div} ifelse} if} def
+/pm3dGamma 1.0 1.5 div def
+/ColorSpace (RGB) def
+Color true and { % COLOUR vs. GRAY map
+ InterpolatedColor { %% Interpolation vs. RGB-Formula
+ /g {stroke pm3dround /grayv exch def interpolate
+ SelectSpace setrgbcolor} bind def
+ }{
+ /g {stroke pm3dround dup cF7 Constrain exch dup cF5 Constrain exch cF15 Constrain
+ SelectSpace setrgbcolor} bind def
+ } ifelse
+}{
+ /g {stroke pm3dround pm3dGamma exp setgray} bind def
+} ifelse
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 483 M
+5849 0 V
+stroke
+LTb
+1113 483 M
+-63 0 V
+5912 0 R
+63 0 V
+966 483 M
+( 0) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 979 M
+5849 0 V
+stroke
+LTb
+1113 979 M
+-63 0 V
+5912 0 R
+63 0 V
+966 979 M
+( 200000) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 1475 M
+5849 0 V
+stroke
+LTb
+1113 1475 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 400000) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 1971 M
+5849 0 V
+stroke
+LTb
+1113 1971 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 600000) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 2467 M
+5849 0 V
+stroke
+LTb
+1113 2467 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 800000) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 2963 M
+5849 0 V
+stroke
+LTb
+1113 2963 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 1e+06) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 3460 M
+5849 0 V
+stroke
+LTb
+1113 3460 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 1.2e+06) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 3956 M
+5849 0 V
+stroke
+LTb
+1113 3956 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 1.4e+06) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 4452 M
+5849 0 V
+stroke
+LTb
+1113 4452 M
+-63 0 V
+5912 0 R
+63 0 V
+-6059 0 R
+( 1.6e+06) Rshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1113 483 M
+0 4137 V
+stroke
+LTb
+1113 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 0) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+1825 483 M
+0 4137 V
+stroke
+LTb
+1825 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 200000) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+2537 483 M
+0 4137 V
+stroke
+LTb
+2537 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 400000) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+3249 483 M
+0 4137 V
+stroke
+LTb
+3249 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 600000) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+3961 483 M
+0 4137 V
+stroke
+LTb
+3961 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 800000) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+4673 483 M
+0 4137 V
+stroke
+LTb
+4673 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 1e+06) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+5385 483 M
+0 4137 V
+stroke
+LTb
+5385 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 1.2e+06) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+6096 483 M
+0 4137 V
+stroke
+LTb
+6096 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 1.4e+06) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTa
+6808 483 M
+0 4137 V
+stroke
+LTb
+6808 483 M
+0 -63 V
+0 4200 R
+0 63 V
+0 -4403 R
+( 1.6e+06) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTb
+1113 4620 N
+0 -4137 V
+5849 0 V
+0 4137 V
+-5849 0 V
+Z stroke
+LCb setrgbcolor
+140 2551 M
+currentpoint gsave translate 90 rotate 0 0 M
+(gi|6626253|gb|AE000511.1|_Helicobacter_pylori_26695,_complete_genome) Cshow
+grestore
+LTb
+LCb setrgbcolor
+4037 70 M
+(gi|12057207|gb|AE001439.1|_Helicobacter_pylori_J99,_complete_genome ) Cshow
+LTb
+4037 4830 M
+(plot_matches) Cshow
+1.000 UP
+1.000 UL
+LTb
+2.000 UL
+LT0
+0.00 1.00 0.00 C /Helvetica findfont 140 scalefont setfont
+1113 483 M
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+-1 1661 R
+1 -1660 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+0 1 R
+0 -1 R
+0 3169 R
+0 -3169 R
+1 0 V
+0 3170 R
+0 -3170 R
+0 1 V
+0 -1 R
+0 1 R
+1 0 V
+0 3255 R
+0 -3254 R
+1 0 V
+0 3254 R
+0 1 R
+0 -919 R
+0 -1527 R
+1 1 R
+0 2445 R
+0 -918 R
+0 -2337 R
+0 2337 R
+0 -1527 R
+0 2446 R
+0 -919 R
+0 -2337 R
+1 0 V
+0 45 R
+0 534 R
+0 -9 R
+0 -525 R
+0 2293 R
+0 -1768 R
+0 1350 R
+0 -1920 R
+0 1 V
+0 2337 R
+0 918 R
+0 -2445 R
+0 -810 R
+1 0 R
+0 570 R
+0 -570 R
+0 2338 R
+0 918 R
+0 -2446 R
+0 1109 R
+0 -1919 R
+0 3256 R
+0 -918 R
+0 -1528 R
+0 1 V
+0 -811 R
+1 1 V
+1 0 V
+0 1 V
+0 3543 R
+0 -3543 R
+0 810 R
+1 0 V
+-1 -810 R
+1 0 V
+0 1 R
+1 0 V
+0 3256 R
+0 -3256 R
+1 0 V
+-1 45 R
+1 -45 R
+0 1 R
+0 3071 R
+0 -734 R
+0 -2292 R
+0 765 R
+0 2446 R
+0 -3211 R
+0 1874 R
+0 -1919 R
+1 0 V
+-1 2338 R
+1 -2338 R
+0 2338 R
+0 -2338 R
+1140 502 L
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+0 1146 R
+0 -1146 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+0 617 R
+0 -617 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+0 3671 R
+0 272 R
+0 -3943 R
+1 3943 R
+0 -3943 R
+0 1 V
+1 765 R
+0 2445 R
+0 -918 R
+0 -2337 R
+0 3256 R
+0 -2446 R
+0 -765 R
+0 3027 R
+0 -735 R
+1197 2834 L
+0 -2338 R
+0 579 R
+0 -9 R
+0 -525 R
+0 525 R
+0 -525 R
+0 2293 R
+1 0 V
+-1 918 R
+1 0 V
+-1 -2445 R
+1 0 V
+-1 -232 R
+1 1 V
+-1 -580 R
+1 1 V
+0 44 R
+0 1 V
+0 3026 R
+0 -3026 R
+0 2292 R
+0 919 R
+0 -2446 R
+0 1109 R
+0 -1874 R
+1 0 V
+-1 534 R
+1 0 V
+-1 2677 R
+1 0 V
+-1 -918 R
+1 0 V
+-1 -1528 R
+1 0 V
+0 -765 R
+0 1 V
+1 0 R
+0 765 R
+0 1528 R
+0 -2293 R
+1 0 V
+0 1 V
+1 1 R
+0 2292 R
+1 1769 R
+0 -4061 R
+0 2293 R
+0 918 R
+0 -2445 R
+0 -766 R
+0 -45 R
+0 3257 R
+0 -3212 R
+0 1 V
+1 0 R
+0 2292 R
+0 -2337 R
+0 45 R
+0 1874 R
+0 -1874 R
+0 -45 R
+0 45 R
+1 0 V
+0 1 V
+0 1873 R
+0 1688 R
+0 -3561 R
+1 0 R
+0 3717 R
+0 -664 R
+0 -2694 R
+0 735 R
+1 -1093 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+3 1018 R
+1 -1015 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 1 R
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1235 567 L
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+0 3260 R
+0 -3260 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+0 2200 R
+0 -2200 R
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 4 R
+0 1 V
+1 0 V
+8 0 R
+1 9 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1307 625 L
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+0 2314 R
+0 -2314 R
+0 1181 R
+0 -1181 R
+1 0 V
+0 1 V
+1 0 V
+1 3 R
+0 -2 R
+0 2 R
+0 -2 R
+1 2 R
+0 -2 R
+1 1 R
+1 0 R
+0 1 R
+2 1 R
+1 1 R
+1 0 V
+0 2749 R
+0 -2749 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1373 670 L
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 R
+0 2684 R
+0 1 V
+1 -2685 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 3 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 1 V
+0 55 R
+0 1303 R
+0 -1358 R
+1 0 V
+0 1 V
+1 0 V
+0 3879 R
+0 -3879 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 2721 R
+0 -703 R
+0 -2018 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+1436 718 L
+1 0 V
+0 2478 R
+0 1 V
+0 -2479 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+0 666 R
+0 -666 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 3069 R
+0 -3069 R
+1 1 V
+0 1537 R
+0 -1537 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1303 R
+0 -1358 R
+0 55 R
+0 1 V
+0 -56 R
+0 1358 R
+0 -1302 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1495 758 L
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+0 2857 R
+0 -2857 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1580 R
+0 -1580 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+0 3264 R
+1 -3264 R
+1 0 V
+0 1 R
+1 0 V
+0 2800 R
+0 4 R
+0 -2800 R
+0 4 R
+0 2792 R
+0 4 R
+0 -2800 R
+0 4 R
+0 -8 R
+1554 801 L
+0 2799 R
+1 1 V
+-1 3 R
+1 0 V
+1554 804 M
+1 0 V
+-1 4 R
+1 0 V
+-1 -7 R
+1 0 V
+0 2803 R
+0 -3 R
+0 -2797 R
+0 4 R
+0 1 V
+0 2795 R
+0 -2803 R
+0 3 R
+0 1 V
+0 4 R
+0 2795 R
+0 -3 R
+0 -2800 R
+0 4 R
+0 4 R
+0 2792 R
+0 -2800 R
+0 4 R
+0 4 R
+0 2795 R
+0 1 V
+0 -4 R
+1 0 V
+1555 805 M
+0 4 R
+0 -8 R
+1 0 V
+-1 2804 R
+0 -2800 R
+0 4 R
+0 -4 R
+1 0 V
+-1 4 R
+1 0 V
+-1 2796 R
+1 0 V
+0 -2803 R
+0 2800 R
+0 -2797 R
+0 2800 R
+0 -2796 R
+0 1 V
+1 0 V
+-1 2792 R
+0 -2797 R
+0 1 R
+1 2799 R
+0 -2795 R
+0 -4 R
+0 2799 R
+0 -3 R
+0 -2792 R
+0 2795 R
+0 -2795 R
+0 2792 R
+0 -2800 R
+0 2800 R
+0 -2800 R
+0 8 R
+1 0 V
+-1 2795 R
+0 1 V
+1 0 V
+-1 -4 R
+1 1 V
+1557 802 M
+1 1 V
+0 2803 R
+0 -2796 R
+0 1 V
+0 2792 R
+0 -2800 R
+0 2803 R
+0 -3 R
+0 -2800 R
+0 8 R
+0 2795 R
+0 -2795 R
+0 2795 R
+1 0 V
+-1 -3 R
+1 0 V
+1558 803 M
+1 0 V
+-1 8 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1566 816 L
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 1121 R
+1 0 V
+0 -860 R
+0 -261 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 2 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+1 0 R
+0 1 R
+0 639 R
+1 -639 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+0 4 R
+0 -4 R
+0 4 R
+0 -4 R
+1 0 V
+0 4 R
+0 -4 R
+0 1 V
+1 4 R
+0 -4 R
+1 0 R
+0 1 V
+0 4 R
+0 -4 R
+1628 859 L
+0 1 R
+1 0 V
+-1 4 R
+1 -4 R
+1 1 V
+0 -4 R
+0 4 R
+1 0 V
+0 -4 R
+0 4 R
+0 -4 R
+0 4 R
+0 1 V
+0 -5 R
+0 1 V
+0 4 R
+1 0 V
+0 -4 R
+0 4 R
+1 1 V
+0 -4 R
+0 4 R
+1 0 V
+0 1 V
+0 -5 R
+0 5 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+3 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1686 898 L
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+0 -1 R
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1951 R
+1 -1950 R
+0 1982 R
+1 -1981 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 2068 R
+1 -2068 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 831 R
+0 -831 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+1 1 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+11 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1760 943 L
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1056 R
+0 -1056 R
+0 3350 R
+0 -1931 R
+0 -1419 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1824 988 L
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+2 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 3305 R
+0 -3305 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1389 R
+0 -1389 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1679 R
+0 -1679 R
+0 1 V
+0 2255 R
+0 -2255 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+0 2332 R
+0 -2332 R
+1 0 V
+0 2333 R
+0 -2333 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1884 1029 L
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1628 R
+0 -1628 R
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1188 R
+0 -1188 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1792 R
+0 572 R
+0 -702 R
+0 -1662 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1515 R
+0 -1514 R
+0 -525 R
+0 -45 R
+0 3072 R
+0 -1153 R
+0 1337 R
+0 -2446 R
+0 1527 R
+0 1 V
+0 -1759 R
+0 2493 R
+1939 3568 L
+0 -2502 R
+0 1 V
+0 2501 R
+0 -2501 R
+0 1767 R
+0 919 R
+0 -2446 R
+0 -240 R
+1 0 V
+-1 2502 R
+1 0 V
+-1 -735 R
+1 0 V
+0 -1767 R
+0 -570 R
+0 3072 R
+0 -3072 R
+0 3256 R
+0 -918 R
+0 -1528 R
+0 -765 R
+0 534 R
+0 -9 R
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+0 2501 R
+0 -734 R
+0 -2337 R
+0 570 R
+1 0 R
+0 -525 R
+0 525 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 -9 R
+0 9 R
+0 1341 R
+0 -1341 R
+0 1 V
+1 0 V
+0 -9 R
+0 9 R
+0 -534 R
+0 3211 R
+0 -918 R
+0 -1528 R
+0 -231 R
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 233 R
+0 2446 R
+0 -2679 R
+0 -577 R
+0 577 R
+0 1 V
+0 1341 R
+0 418 R
+1 -1759 R
+0 1341 R
+0 1152 R
+0 -2493 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+0 251 R
+0 -251 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 3355 R
+0 -2043 R
+0 1220 R
+0 -2531 R
+0 1703 R
+0 -1703 R
+1973 1090 L
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 419 R
+0 -419 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1296 R
+0 -1294 R
+0 1 V
+0 -3 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+0 -3 R
+0 1 V
+0 2 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+2034 1132 L
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+-1 1929 R
+1 0 V
+0 -1929 R
+0 1 V
+0 1928 R
+0 -1928 R
+0 1928 R
+0 -1928 R
+0 1928 R
+0 1 V
+0 -1929 R
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+0 1925 R
+0 -1925 R
+0 1925 R
+0 -1925 R
+0 1925 R
+0 -1925 R
+1 0 V
+0 1 V
+1 0 V
+-1 -617 R
+1 617 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 -595 R
+0 595 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+2089 1171 L
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 -338 R
+0 338 R
+0 1 R
+1 0 V
+0 1577 R
+0 -1577 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+0 2173 R
+0 -2173 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 3369 R
+0 -3368 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+2152 1214 L
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+2 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 -124 R
+0 124 R
+0 1 V
+1 0 V
+1 1 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+2218 1260 L
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 810 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+2287 2117 L
+0 -805 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1584 R
+1 -173 R
+0 173 R
+0 -1583 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+3 1 R
+1 0 V
+0 1 R
+0 2352 R
+0 -2563 R
+0 211 R
+1 0 R
+0 1 V
+1 0 V
+1 9 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+2355 1367 L
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 -300 R
+1 301 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+38 2926 R
+72 144 R
+13 -838 R
+38 -2238 R
+0 1 V
+17 1754 R
+0 -2456 R
+22 1310 R
+7 1398 R
+2617 580 M
+42 2455 R
+29 -1362 R
+1 0 V
+0 1 V
+1 0 V
+2 1 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+-1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+2 1 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 2436 R
+1 -2436 R
+0 1 V
+1 0 R
+2710 1687 L
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+0 1727 R
+0 1 V
+0 -1934 R
+0 206 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+0 1210 R
+1 -1210 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1211 R
+1 -1210 R
+0 1210 R
+0 -1210 R
+0 1210 R
+1 -1210 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1 V
+0 1210 R
+0 -1210 R
+0 1210 R
+1 -1210 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+2761 1724 L
+0 1210 R
+0 -1210 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 4 R
+0 -3 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+2 2 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 R
+1 -5 R
+1 0 V
+0 1 R
+1 5 R
+1 1 R
+1 2 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 2284 R
+0 -2284 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+2827 1771 L
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 1 V
+1 0 V
+1 0 V
+0 1 R
+0 1073 R
+0 -1073 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+0 2768 R
+0 -2768 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+0 -1 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+2892 1815 L
+0 1 V
+2 1 R
+0 -1181 R
+1 1434 R
+1 1357 R
+0 324 R
+0 -3045 R
+1 1112 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 -615 R
+0 2065 R
+0 -1450 R
+1 0 V
+1 1 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+1 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+2963 1862 L
+0 3 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 -2 R
+0 1 R
+0 1 R
+0 3 R
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+1 3 R
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+0 3 R
+0 -1 R
+0 -1 R
+0 1 R
+0 -3 R
+0 2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -5 R
+0 1 R
+0 1 R
+0 3 R
+0 -2 R
+0 1 V
+0 -1 R
+0 -2 R
+0 -1 R
+0 2 R
+0 -2 R
+0 1 R
+0 -1 R
+0 1 R
+0 4 R
+0 -2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -2 R
+0 1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+1 -1 R
+0 1 V
+0 3 R
+0 -1 R
+0 -2 R
+0 1 V
+0 2 R
+0 -1 R
+0 1 V
+0 -2 R
+0 2 R
+0 -3 R
+0 1 R
+0 2 R
+0 1 R
+0 -2 R
+0 1 R
+0 -5 R
+0 1 R
+0 1 R
+0 1 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 3 R
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+0 1 R
+1 0 V
+-1 -1 R
+1 0 V
+-1 2 R
+1 0 V
+-1 1 R
+1 0 V
+-1 -3 R
+1 0 V
+-1 -2 R
+1 0 V
+-1 1 R
+1 0 V
+-1 1 R
+1 0 V
+-1 3 R
+1 0 V
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+0 1 R
+0 -1 R
+0 2 R
+0 -3 R
+0 2 R
+0 2 R
+0 -3 R
+2966 1865 L
+0 -3 R
+0 5 R
+0 -1 R
+0 -3 R
+0 1 V
+0 1 R
+0 1 R
+0 1 V
+0 -2 R
+0 2 R
+0 -3 R
+0 1 R
+0 2 R
+0 1 R
+0 -2 R
+1 -1 R
+0 -1 R
+0 -2 R
+0 1 R
+0 2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 3 R
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+0 1 R
+0 -1 R
+0 2 R
+0 -3 R
+0 2 R
+0 2 R
+0 -2 R
+0 1 V
+0 -1 R
+0 -2 R
+0 -1 R
+0 2 R
+0 3 R
+0 -5 R
+0 1 R
+0 1 R
+0 1 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 3 R
+1 0 V
+-1 -1 R
+1 0 V
+-1 -3 R
+1 0 V
+-1 -1 R
+1 0 V
+-1 1 R
+1 0 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+0 1 R
+0 -1 R
+0 2 R
+0 -3 R
+0 2 R
+0 2 R
+0 -2 R
+0 1 V
+0 -1 R
+0 -2 R
+0 -1 R
+0 2 R
+0 3 R
+0 -5 R
+0 1 R
+0 1 R
+0 1 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 3 R
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+1 1 R
+0 -1 R
+0 2 R
+0 -3 R
+0 2 R
+0 2 R
+0 -3 R
+0 1 V
+0 -3 R
+0 5 R
+0 -1 R
+0 -3 R
+0 2 R
+0 2 R
+0 1 R
+0 -2 R
+0 -2 R
+0 1 R
+0 -1 R
+0 -2 R
+0 1 R
+0 2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+1 3 R
+0 -2 R
+2970 1866 L
+0 -1 R
+0 1 R
+0 1 R
+0 -1 R
+0 -1 R
+0 1 R
+0 -3 R
+0 2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -5 R
+0 1 R
+0 1 R
+0 3 R
+0 -2 R
+0 1 V
+0 -1 R
+0 -2 R
+0 -1 R
+0 1 R
+0 1 R
+0 1 R
+0 -1 R
+0 2 R
+0 -4 R
+0 1 R
+0 2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 2 R
+0 -1 R
+0 1 R
+0 -3 R
+0 2 R
+0 -1 R
+0 2 R
+0 -2 R
+0 3 R
+1 0 V
+0 -5 R
+0 5 R
+0 1 R
+0 -2 R
+0 -2 R
+0 1 R
+0 -1 R
+0 -2 R
+0 1 R
+0 2 R
+0 -1 R
+0 2 R
+0 1 R
+0 -3 R
+0 3 R
+0 -1 R
+0 -3 R
+0 -1 R
+0 1 R
+0 1 R
+0 3 R
+0 1 V
+0 -1 R
+1 0 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 -2 R
+1 0 V
+-1 -2 R
+1 0 V
+-1 1 R
+1 0 V
+0 3 R
+0 -1 R
+0 1 R
+0 -2 R
+0 -2 R
+0 1 R
+0 3 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+0 2 R
+1 0 V
+-1 -1 R
+1 0 V
+-1 -1 R
+1 0 V
+0 2 R
+0 -1 R
+0 -1 R
+0 2 R
+0 -1 R
+0 -1 R
+0 2 R
+0 -1 R
+0 -1 R
+0 1 R
+0 -1 R
+0 1 R
+0 -1 R
+1 1 R
+0 -1 R
+0 1 R
+0 -1 R
+2976 1871 L
+0 1 V
+0 -1 R
+0 1 R
+0 -1 R
+0 -1 R
+0 2 R
+0 -1 R
+0 -1 R
+0 2 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+0 2373 R
+0 -2373 R
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+3037 1914 L
+1 0 R
+0 1 V
+1 0 V
+2 1 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+0 1583 R
+0 -1583 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 2 R
+1 0 V
+-1 -2 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+0 -1 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 26 R
+0 1178 R
+0 -1596 R
+0 1 V
+0 417 R
+0 1178 R
+0 -1595 R
+0 393 R
+1 0 V
+1 1 V
+0 -93 R
+1 93 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+3096 1956 L
+1 1 V
+0 1057 R
+0 -1057 R
+0 1057 R
+0 1 V
+0 -1058 R
+2 1 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+2 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1039 R
+0 -1039 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+3163 2002 L
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+7 13 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 -1358 R
+0 55 R
+0 -55 R
+0 1358 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+3235 2060 L
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1086 R
+0 -1086 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 7 R
+0 -7 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+3300 2104 L
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+0 819 R
+0 -819 R
+0 819 R
+0 -819 R
+1 0 V
+0 1 V
+0 819 R
+0 -819 R
+1 0 V
+0 1 V
+1 0 V
+-1 819 R
+1 -819 R
+0 1 R
+1 0 V
+1 2 R
+1 1 R
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 1 R
+1 1 R
+1 0 V
+0 -3 R
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+0 2169 R
+0 -2169 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+3358 2145 L
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 -1661 R
+0 1661 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+0 625 R
+0 -437 R
+0 -188 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+0 285 R
+0 -1452 R
+0 1167 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+0 -836 R
+1 836 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+3420 2187 L
+1 0 R
+0 1 V
+0 -245 R
+0 245 R
+1 0 R
+0 1994 R
+0 -1993 R
+1 0 V
+3 65 R
+1 -65 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 2063 R
+0 -2063 R
+0 1 R
+3479 2225 L
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+0 -1188 R
+0 1188 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+0 1374 R
+1 -1374 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 333 R
+0 -333 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 4 R
+1 0 V
+0 1 V
+3544 2273 L
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+3 5 R
+1 0 R
+0 1 R
+1 0 V
+0 -1537 R
+0 1537 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 313 R
+1 -313 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+45 27 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+3657 2349 L
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+-1 990 R
+1 0 V
+0 -990 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+-1 948 R
+1 0 V
+3677 945 M
+1 0 V
+0 1419 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 -1580 R
+0 1580 R
+2 1 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+0 -3 R
+1 0 V
+0 3 R
+1 1 V
+0 1880 R
+0 -1880 R
+0 1880 R
+0 -1880 R
+1 0 V
+0 1 V
+0 -950 R
+0 950 R
+1 0 R
+1 1 V
+0 1881 R
+0 -1881 R
+0 -949 R
+1 949 R
+0 1 V
+0 572 R
+0 -1522 R
+0 1522 R
+0 -572 R
+0 1880 R
+0 1 V
+0 -1881 R
+1 0 V
+-1 572 R
+1 0 V
+0 -572 R
+0 -949 R
+0 2830 R
+0 -1881 R
+3705 2383 L
+0 -950 R
+0 2830 R
+0 -1880 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1888 R
+1 0 V
+0 -1888 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 -1186 R
+0 1187 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+0 -1389 R
+0 1389 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1555 R
+0 -1555 R
+0 1 V
+0 1554 R
+0 -1554 R
+1 0 V
+1 1 R
+0 1554 R
+9 -2897 R
+0 2502 R
+0 -1153 R
+0 -1874 R
+0 2292 R
+0 1 V
+0 734 R
+1 0 V
+0 -1152 R
+0 -1874 R
+1 0 V
+-1 1874 R
+1 0 V
+0 -1919 R
+0 1919 R
+0 1153 R
+0 -2502 R
+0 -570 R
+0 1919 R
+0 -1349 R
+0 1349 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+0 -1342 R
+0 1 V
+0 -578 R
+0 1919 R
+0 -1874 R
+0 -45 R
+0 2338 R
+0 1334 R
+0 -3672 R
+0 2338 R
+0 -419 R
+0 -1341 R
+0 2493 R
+0 1 R
+0 -1153 R
+3760 2420 L
+-1 -1049 R
+1 0 V
+-1 2737 R
+1 0 V
+-1 -1688 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+0 -1296 R
+0 1296 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+2 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+0 -1843 R
+1 0 V
+-1 1843 R
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+0 -48 R
+0 48 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+3822 2465 L
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+0 1689 R
+0 -3644 R
+0 2124 R
+0 1 V
+0 -170 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 656 R
+0 -656 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 R
+0 -1596 R
+0 1596 R
+3884 2507 L
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+1 0 R
+0 1 V
+1 1 R
+1 0 R
+3 2 R
+1 1 R
+1 0 R
+1 1 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 -179 R
+0 180 R
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 R
+3954 2555 L
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 983 R
+0 -983 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 R
+1 -1313 R
+0 1313 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+0 135 R
+1 0 V
+-1 -133 R
+1 0 V
+-1 135 R
+1 0 V
+-1 -136 R
+1 0 V
+1 137 R
+0 -136 R
+0 135 R
+1 0 V
+-1 -135 R
+1 0 V
+0 135 R
+0 -135 R
+0 135 R
+0 -135 R
+0 135 R
+0 -135 R
+0 135 R
+0 -135 R
+2 136 R
+0 -136 R
+0 134 R
+0 -132 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+4007 2592 L
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 -1145 R
+0 1145 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+4070 2637 L
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+5 -1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 -169 R
+0 -1955 R
+0 2124 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+4141 2680 L
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+0 -1149 R
+1 1149 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+0 -756 R
+0 1 V
+0 755 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 -135 R
+0 135 R
+0 -135 R
+0 144 R
+0 174 R
+0 -174 R
+0 1 V
+0 -145 R
+1 0 R
+0 135 R
+0 -134 R
+0 134 R
+0 -133 R
+0 135 R
+0 -136 R
+0 134 R
+0 1 V
+1 0 V
+0 -134 R
+0 136 R
+1 0 V
+-1 -135 R
+4194 2583 L
+-1 -1 R
+1 0 V
+0 136 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 882 R
+0 -882 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 7 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+2 1 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+4255 2769 L
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+0 -626 R
+0 626 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+-1 1630 R
+1 -1630 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+0 613 R
+1 0 V
+1 5 R
+2 -1050 R
+0 990 R
+0 -990 R
+3 436 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 -373 R
+0 373 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+4321 2814 L
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+4 -1805 R
+1 1805 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 -1525 R
+0 1526 R
+0 1 R
+0 1 V
+1 302 R
+0 1 V
+0 -1596 R
+0 418 R
+1 0 R
+0 1178 R
+0 -1596 R
+0 1 V
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+1 0 V
+-1 -1595 R
+1 0 V
+0 1595 R
+0 -1595 R
+0 1595 R
+0 1 V
+0 -1596 R
+0 1 V
+0 1595 R
+0 -1595 R
+1 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1596 R
+0 -1596 R
+0 1596 R
+1 0 V
+-1 -1596 R
+1 1 V
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+1 1596 R
+0 -1596 R
+0 1 V
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1596 R
+1 0 V
+-1 -1596 R
+1 1 V
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+0 1595 R
+0 -1595 R
+2 -239 R
+2 0 R
+0 1527 R
+0 -1527 R
+0 1527 R
+0 -1 R
+0 -1 R
+0 -1526 R
+0 1527 R
+1 -1 R
+4360 2830 L
+0 172 R
+0 -172 R
+0 2 R
+0 -1527 R
+0 1527 R
+0 -1527 R
+0 1527 R
+1 0 V
+-1 -1527 R
+1 0 V
+-1 1525 R
+1 0 V
+1 922 R
+0 -3256 R
+0 2337 R
+0 919 R
+0 -2446 R
+0 1109 R
+0 1337 R
+0 -2446 R
+0 1528 R
+0 918 R
+1 0 V
+-1 -2446 R
+1 0 V
+-1 1528 R
+1 0 V
+-1 -1759 R
+1 0 V
+0 1759 R
+0 -1768 R
+0 1768 R
+0 918 R
+0 -2445 R
+0 -811 R
+0 1 V
+0 2337 R
+0 918 R
+0 -2445 R
+0 -232 R
+0 1 V
+0 1758 R
+0 918 R
+0 -2445 R
+0 1527 R
+0 -2293 R
+0 1 V
+0 3026 R
+0 185 R
+0 -2446 R
+0 1527 R
+0 -2337 R
+0 45 R
+0 2292 R
+1 0 V
+0 919 R
+0 -2446 R
+0 1527 R
+0 1 V
+0 918 R
+0 -2446 R
+0 -810 R
+0 579 R
+0 3310 R
+0 -1551 R
+0 918 R
+0 -918 R
+0 918 R
+0 -2445 R
+0 1527 R
+0 918 R
+0 -2445 R
+0 1527 R
+0 918 R
+0 -2445 R
+0 1527 R
+1 918 R
+0 1 V
+0 -2446 R
+0 1527 R
+0 919 R
+0 -2446 R
+0 1527 R
+0 -2337 R
+0 3256 R
+0 -2446 R
+0 1527 R
+0 919 R
+0 -2446 R
+0 1527 R
+0 1 V
+0 918 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 -2445 R
+0 1527 R
+0 -1527 R
+0 -766 R
+0 -45 R
+0 3257 R
+0 -2446 R
+0 1527 R
+0 -1527 R
+0 2446 R
+1 -2446 R
+4370 1312 L
+0 2445 R
+0 -919 R
+0 -2337 R
+0 570 R
+0 1767 R
+0 1 V
+0 -1527 R
+0 2445 R
+0 -918 R
+0 -1768 R
+0 241 R
+0 2445 R
+0 -2445 R
+1 0 V
+-1 2446 R
+1 0 V
+0 725 R
+0 -1903 R
+0 259 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 -1543 R
+0 453 R
+0 1090 R
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 -659 R
+0 659 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1748 R
+0 -1748 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+4422 2874 L
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 266 R
+0 -266 R
+0 -2190 R
+0 2190 R
+1 0 V
+0 266 R
+0 -266 R
+0 266 R
+1 -2456 R
+0 2190 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+0 73 R
+0 -73 R
+1 0 V
+1 1 V
+1 0 V
+-1 4 R
+1 -4 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 -4 R
+0 4 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+0 -1885 R
+0 1885 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1217 R
+0 -1217 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 -1574 R
+0 1574 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+4479 2912 L
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+0 -1 R
+0 1 R
+0 -1 R
+1 1 R
+1 0 V
+0 1 R
+0 1 R
+0 -1 R
+1 0 V
+-1 1 R
+1 0 V
+0 -1 R
+0 2 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+0 -1 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 -819 R
+0 819 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 -819 R
+0 819 R
+1 1 V
+1 0 V
+0 1 R
+0 -819 R
+0 819 R
+1 0 V
+0 -1210 R
+0 1211 R
+0 -1211 R
+1 0 V
+-1 1211 R
+1 0 V
+0 -1211 R
+0 1211 R
+0 -1210 R
+0 1210 R
+0 -1210 R
+1 0 V
+-1 1210 R
+1 0 V
+0 -1210 R
+0 1211 R
+0 -1211 R
+1 0 V
+-1 1211 R
+1 0 V
+0 -1211 R
+0 1 V
+0 1210 R
+0 -1210 R
+0 1210 R
+0 -1210 R
+0 1211 R
+0 -1211 R
+0 1211 R
+1 -1211 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1211 R
+0 -1211 R
+0 1 V
+0 1210 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+4517 2939 L
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 -1 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+0 -73 R
+0 73 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+0 -576 R
+0 -3 R
+1 0 V
+0 3 R
+1 1 V
+0 1880 R
+0 -1880 R
+0 -950 R
+0 950 R
+1 0 V
+0 -949 R
+0 950 R
+1 0 R
+0 1 V
+1 0 R
+0 1881 R
+0 -1881 R
+1 0 V
+-1 -949 R
+1 0 V
+0 949 R
+0 1 V
+0 572 R
+0 -1522 R
+0 1522 R
+1 0 V
+-1 -572 R
+1 0 V
+-1 1880 R
+0 1 V
+1 -1881 R
+0 572 R
+0 -572 R
+0 -949 R
+0 2830 R
+0 -1881 R
+0 1 V
+0 -950 R
+0 2830 R
+0 -1880 R
+1 0 V
+0 572 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+4562 2963 L
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+0 1206 R
+0 -1206 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 -1894 R
+0 1894 R
+1 0 V
+0 1 R
+1 0 V
+0 350 R
+0 -349 R
+1 420 R
+0 -420 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+4624 3007 L
+1 1 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 1 V
+1 -290 R
+0 -1944 R
+0 1800 R
+2 435 R
+1 1 R
+0 14 R
+0 -14 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+0 -654 R
+0 1246 R
+0 -591 R
+1 0 V
+0 -14 R
+0 14 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 -775 R
+0 775 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+4689 3054 L
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1155 R
+0 1 V
+0 -1156 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+5 -2510 R
+10 2558 R
+4 -543 R
+54 542 R
+1 0 V
+28 -23 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+4 3 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 -18 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+14 -1394 R
+15 2815 R
+4 -3489 R
+1 3109 R
+1 0 V
+0 1 R
+1 0 V
+2 -1028 R
+1 1 R
+1 1033 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+2 1 V
+1 1 V
+1 0 R
+1 1 V
+2 1 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 1 R
+1 0 V
+7 -413 R
+1 425 R
+1 0 V
+0 1 R
+24 419 R
+68 -172 R
+21 -2146 R
+5014 2264 L
+12 2164 R
+15 -3250 R
+11 1261 R
+19 -228 R
+1 0 V
+0 1 R
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 V
+0 -1 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+5097 497 M
+19 34 R
+39 2826 R
+0 -471 R
+1 471 R
+0 -2332 R
+0 2332 R
+0 1 V
+1 0 V
+0 -2333 R
+0 2333 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 -2173 R
+0 2173 R
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+2 1 R
+0 1 R
+1 0 R
+1 1 V
+1 -3 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+5203 3387 L
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 -1447 R
+0 1447 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+5 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+0 -614 R
+0 1 V
+1 0 V
+-1 613 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 -615 R
+0 616 R
+0 -616 R
+0 616 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+3 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+5276 3432 L
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 24 R
+0 -24 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 R
+1 0 R
+2 1 R
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+5342 3479 L
+0 752 R
+0 -752 R
+1 0 V
+-1 752 R
+1 1 V
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+1 0 V
+-1 -753 R
+1 1 V
+0 753 R
+1 0 V
+-1 -753 R
+1 0 V
+0 753 R
+2 2 V
+-2 -755 R
+0 1 V
+2 1 V
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+1 0 V
+-1 753 R
+1 0 V
+0 -753 R
+0 753 R
+0 -752 R
+0 752 R
+0 1 V
+0 -753 R
+1 0 V
+-1 753 R
+1 0 V
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+0 -752 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 -2288 R
+0 -294 R
+0 2185 R
+0 -1480 R
+0 1275 R
+0 1 V
+0 -2052 R
+0 2053 R
+0 -2053 R
+0 71 R
+1 0 V
+0 -70 R
+0 2050 R
+0 1 V
+0 601 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 687 R
+0 -687 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+5376 3502 L
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+5426 882 M
+1 0 V
+0 2655 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+2 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 R
+5446 3550 L
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+0 -1153 R
+0 1153 R
+0 -3027 R
+0 1875 R
+0 -1350 R
+0 2502 R
+0 -734 R
+1 0 V
+-1 918 R
+1 0 V
+-1 -2445 R
+1 0 V
+-1 -232 R
+1 1 V
+-1 -580 R
+1 1 V
+0 3071 R
+0 -3071 R
+0 569 R
+0 1 V
+0 2501 R
+0 1 V
+0 -735 R
+0 1 V
+0 918 R
+0 -2446 R
+0 1109 R
+1 0 V
+0 1 V
+0 1152 R
+1 -733 R
+0 -1527 R
+0 2261 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+0 -734 R
+0 -2337 R
+0 577 R
+0 1 V
+0 2493 R
+1 0 V
+0 -2493 R
+0 1341 R
+0 1152 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+0 -2429 R
+0 2429 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+5490 3580 L
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+0 82 R
+1 -82 R
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 4 R
+0 -2800 R
+0 4 R
+0 2792 R
+0 4 R
+0 -2800 R
+0 4 R
+0 -8 R
+0 2800 R
+0 1 V
+0 3 R
+0 -2800 R
+0 4 R
+0 -7 R
+0 2800 R
+0 3 R
+0 -2800 R
+0 4 R
+0 -7 R
+0 3 R
+0 4 R
+0 2793 R
+1 0 V
+5519 804 M
+0 4 R
+0 1 V
+0 -8 R
+0 2803 R
+0 -2803 R
+1 0 V
+-1 3 R
+1 0 V
+-1 5 R
+1 0 V
+-1 2795 R
+1 0 V
+0 -2803 R
+0 3 R
+0 1 V
+0 4 R
+0 2792 R
+0 -2800 R
+0 4 R
+0 4 R
+0 2795 R
+0 1 V
+0 -2800 R
+0 4 R
+0 -8 R
+0 2804 R
+0 -2800 R
+0 4 R
+0 -8 R
+0 2800 R
+0 -2796 R
+0 4 R
+0 2796 R
+0 -2804 R
+0 2800 R
+0 -2796 R
+0 4 R
+0 2796 R
+0 -2800 R
+5521 805 L
+-1 4 R
+1 0 V
+0 -7 R
+0 2800 R
+0 3 R
+0 -2800 R
+0 1 R
+0 2799 R
+0 -2799 R
+0 -4 R
+0 2800 R
+0 -2797 R
+0 1 V
+0 2796 R
+0 -2800 R
+0 4 R
+0 2796 R
+0 -2800 R
+0 3 R
+0 1 R
+0 2796 R
+0 -2800 R
+0 2800 R
+0 -2792 R
+0 -4 R
+0 2796 R
+0 -2800 R
+0 3 R
+0 1 R
+0 2796 R
+0 -2800 R
+0 2800 R
+0 -2792 R
+0 -4 R
+0 2796 R
+0 -2800 R
+0 3 R
+0 1 R
+0 2796 R
+0 -2800 R
+0 2800 R
+0 -2792 R
+0 -4 R
+0 2796 R
+0 -2800 R
+0 3 R
+0 1 R
+0 2796 R
+0 -2800 R
+0 2800 R
+0 -2792 R
+0 -4 R
+0 2796 R
+0 -2800 R
+0 3 R
+0 1 R
+0 2796 R
+0 -2800 R
+0 2800 R
+0 -2792 R
+1 2795 R
+0 -2795 R
+0 -4 R
+0 1 V
+1 0 V
+0 2796 R
+0 3 R
+0 1 V
+0 -2804 R
+0 5 R
+0 2799 R
+0 -4 R
+0 -2800 R
+0 1 V
+0 4 R
+0 2869 R
+0 -70 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 -882 R
+0 882 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 108 R
+1 -104 R
+0 1 R
+1 0 R
+5542 3623 L
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 -151 R
+0 151 R
+1 1 V
+1 0 V
+0 36 R
+0 -36 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 -2857 R
+0 2857 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+0 -865 R
+0 865 R
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+0 -36 R
+0 36 R
+1 0 V
+0 1 V
+1 0 V
+0 -3169 R
+0 3169 R
+5600 3664 L
+1 0 V
+-1 -82 R
+1 0 V
+0 -3088 R
+0 3170 R
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 -773 R
+2 775 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+0 -1 R
+0 1 V
+1 0 V
+-1 -1 R
+1 1 R
+1 1 V
+1 0 V
+0 1 V
+0 -2180 R
+0 2180 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 -1305 R
+0 1305 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+5663 3707 L
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 -1031 R
+0 1031 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+3 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 -3254 R
+0 3254 R
+0 1 V
+0 -919 R
+0 -1527 R
+1 -335 R
+0 -474 R
+0 2337 R
+0 -1527 R
+0 2446 R
+0 -1 R
+0 -2445 R
+0 2445 R
+0 -2445 R
+1 2445 R
+0 -2445 R
+1 -810 R
+0 2337 R
+0 919 R
+0 -2446 R
+0 1109 R
+0 1337 R
+0 -2446 R
+0 1528 R
+0 918 R
+0 -2446 R
+0 1528 R
+0 -1759 R
+0 1759 R
+0 -1768 R
+0 1768 R
+0 918 R
+0 -2445 R
+0 -811 R
+5730 497 L
+0 2337 R
+0 918 R
+0 -2445 R
+0 -232 R
+0 1 V
+0 1758 R
+0 918 R
+0 -2445 R
+0 1527 R
+0 -2293 R
+1 1 V
+-1 3026 R
+1 185 R
+0 -2446 R
+0 1527 R
+0 -2337 R
+0 45 R
+0 2292 R
+0 919 R
+0 -2446 R
+0 1527 R
+0 1 V
+0 918 R
+0 -2446 R
+0 -810 R
+0 579 R
+0 3310 R
+0 -1551 R
+0 918 R
+0 -918 R
+1 918 R
+0 -2445 R
+0 1527 R
+0 918 R
+0 -2445 R
+0 1527 R
+0 918 R
+0 -2445 R
+0 1527 R
+0 918 R
+0 1 V
+0 -2446 R
+0 1527 R
+0 919 R
+0 -2446 R
+0 1527 R
+0 -2337 R
+0 3256 R
+0 -2446 R
+0 1527 R
+0 919 R
+1 0 V
+-1 -2446 R
+1 0 V
+-1 1527 R
+1 1 V
+0 -2338 R
+0 810 R
+0 1528 R
+1 0 R
+0 1 R
+0 -2293 R
+1 2293 R
+0 1 V
+1 0 V
+0 -2293 R
+0 2293 R
+1 0 R
+0 -2337 R
+0 45 R
+0 765 R
+0 2446 R
+0 -2446 R
+0 1 V
+0 2445 R
+0 -919 R
+0 1 V
+0 1334 R
+0 -3672 R
+0 2338 R
+0 1334 R
+0 -415 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+5753 3768 L
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+0 -2278 R
+0 2278 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+5821 3815 L
+0 1 V
+0 -3070 R
+0 3070 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+2 0 R
+0 1 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 -3260 R
+0 3260 R
+0 -3260 R
+0 3260 R
+1 5 R
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+5885 3864 L
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+2 1 V
+1 1 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+2 1 R
+0 -1019 R
+0 -1392 R
+1 2412 R
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+2 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+1 1 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+0 -1 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+5961 3917 L
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 -2132 R
+0 2132 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 555 R
+0 -555 R
+0 1 V
+1 0 V
+0 2 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 -451 R
+0 451 R
+1 0 R
+0 1 V
+0 -1554 R
+1 0 V
+0 1554 R
+6025 3964 L
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+0 5 R
+0 -5 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 -5 R
+0 5 R
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+6092 4010 L
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 8 R
+0 -8 R
+0 8 R
+0 -8 R
+0 9 R
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+2 2 R
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+0 -1089 R
+0 1089 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+-1 482 R
+1 -481 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+0 -2284 R
+0 2284 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+6154 4063 L
+1 0 V
+6154 798 M
+1 0 V
+0 3265 R
+1 1 V
+0 -880 R
+0 880 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+4 383 R
+8 -1431 R
+6 1109 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 1 R
+0 1 V
+1 0 R
+1 1 V
+1 1 V
+2 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+2 2 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 -3028 R
+0 3028 R
+6238 4169 L
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 -1334 R
+0 -2338 R
+0 2338 R
+0 1334 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 -687 R
+0 687 R
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 0 R
+0 1 R
+1 3 R
+1 0 V
+0 1 V
+2 0 R
+18 15 R
+1 -1 R
+0 272 R
+0 -3943 R
+0 3943 R
+0 -3943 R
+0 3672 R
+0 271 R
+1 -272 R
+0 272 R
+0 -3943 R
+0 3671 R
+0 272 R
+0 -272 R
+0 1 V
+0 271 R
+0 -271 R
+0 271 R
+1 -271 R
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+6312 4223 L
+0 1 R
+1 0 R
+0 -1148 R
+0 1148 R
+0 -2627 R
+0 2627 R
+1 1 R
+0 1 R
+1 0 V
+1 0 V
+1 3 R
+1 0 V
+0 1 V
+1 0 R
+2 -752 R
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+0 1 V
+0 752 R
+0 -752 R
+1 0 V
+-1 752 R
+1 1 V
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+1 0 V
+-1 -753 R
+1 1 V
+0 753 R
+1 0 V
+-1 -753 R
+1 0 V
+0 753 R
+2 2 V
+-2 -755 R
+0 1 V
+2 1 V
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+0 -753 R
+1 0 V
+-1 753 R
+1 0 V
+0 -753 R
+0 753 R
+0 -752 R
+0 752 R
+0 1 V
+0 -753 R
+1 0 V
+-1 753 R
+1 0 V
+0 -753 R
+0 753 R
+0 -753 R
+0 753 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+0 -3593 R
+1 0 V
+0 3593 R
+0 1 R
+0 -2577 R
+0 2577 R
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 -2796 R
+0 2796 R
+6350 4251 L
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+0 -2830 R
+0 950 R
+1 1880 R
+0 1 V
+1 0 V
+0 -1880 R
+0 -950 R
+0 2830 R
+1 1 V
+0 -2830 R
+0 2830 R
+1 1 V
+1 0 V
+0 -1880 R
+0 -950 R
+0 2830 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+0 -2831 R
+0 2831 R
+0 -2830 R
+0 949 R
+0 1881 R
+1 0 V
+-1 -2830 R
+1 0 V
+0 2830 R
+0 1 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+0 -2063 R
+0 2063 R
+1 281 R
+1 -280 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+6405 4291 L
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+0 -3350 R
+0 3350 R
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+-1 -2169 R
+1 2169 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+0 1 R
+0 -1 R
+1 0 R
+0 1 V
+1 0 R
+0 -1 R
+0 1 R
+0 -1 R
+2 2 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 R
+6467 4333 L
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+1 1 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+0 -1410 R
+0 1410 R
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+6533 4379 L
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 -623 R
+0 623 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+-1 -2936 R
+1 0 V
+0 2936 R
+0 1 V
+1 0 V
+1 0 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+0 -1631 R
+1 1631 R
+1 0 V
+0 1 R
+1 0 V
+0 1 R
+1 0 R
+1 0 V
+0 1 V
+6596 4423 L
+0 1 V
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 V
+1 -3404 R
+0 3109 R
+1 0 R
+2 296 R
+1 0 R
+1 0 V
+1 0 V
+0 2 R
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+2 1 R
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+2 2 R
+0 1 V
+1 0 R
+0 6 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+2 1 R
+1 1 V
+1 0 V
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+0 -3398 R
+0 3398 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+0 -33 R
+0 33 R
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+6664 4475 L
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+0 -272 R
+1 0 V
+-1 272 R
+1 0 V
+0 1 R
+6 -2448 R
+20 901 R
+5 -631 R
+0 1 V
+5 316 R
+3 -219 R
+0 379 R
+0 -626 R
+14 1219 R
+7 -1667 R
+1 1 V
+1 2594 R
+5 -3565 R
+85 1092 R
+29 2714 R
+1 0 R
+0 1 R
+1 0 R
+1 1 R
+1 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 R
+1 0 R
+0 1 V
+1 0 R
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 3 R
+0 -3 R
+0 3 R
+1 -3 R
+0 1 V
+0 2 R
+0 1 V
+0 -3 R
+0 3 R
+0 -3 R
+0 3 R
+0 -3 R
+1 0 V
+0 3 R
+0 -3 R
+0 3 R
+0 -2 R
+1 0 V
+0 3 R
+0 -3 R
+0 3 R
+0 -3 R
+1 1 V
+0 -3 R
+0 3 R
+0 -3 R
+1 0 V
+-1 3 R
+1 0 V
+0 -3 R
+0 3 R
+0 -3 R
+0 3 R
+0 -3 R
+0 1 V
+0 2 R
+0 1 V
+0 -3 R
+0 3 R
+0 -3 R
+6882 4559 L
+-1 3 R
+1 0 R
+0 -3 R
+0 3 R
+0 -3 R
+0 3 R
+0 -3 R
+0 3 R
+0 1 V
+1 0 R
+0 -3 R
+0 3 R
+0 -3 R
+0 3 R
+1 0 R
+0 1 V
+0 -4 R
+0 4 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 R
+1 0 R
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+0 -3879 R
+1 3879 R
+0 1 V
+1 0 V
+0 1 R
+1 0 R
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 V
+0 1 R
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+0 -2417 R
+0 595 R
+1 1825 R
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 R
+0 1 R
+1 0 V
+0 1 R
+1 0 V
+1 0 V
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+6939 4604 L
+0 1 V
+1 0 V
+1 1 R
+1 -3993 R
+1 3993 R
+0 1 V
+1 0 V
+0 1 V
+1 0 V
+1 0 V
+0 1 R
+4 0 R
+0 1 V
+1 0 R
+0 1 V
+1 0 V
+1 1 V
+1 0 R
+0 1 V
+1 0 V
+0 6 R
+0 -6 R
+0 1 V
+0 3 R
+0 2 R
+0 -5 R
+1 0 V
+0 5 R
+0 1 V
+0 -6 R
+0 6 R
+0 -4137 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+0 4131 R
+1 1 R
+1 0 R
+0 1 V
+0 2 R
+0 -2 R
+1 0 R
+1 1 V
+-1 2 R
+1 0 V
+-1 -6 R
+1 0 V
+0 4 R
+1 0 V
+0 1 V
+stroke
+LT0
+1.00 0.00 0.00 C /Helvetica findfont 140 scalefont setfont
+6941 3743 M
+-39 84 R
+-12 657 R
+-28 -817 R
+-12 824 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 -488 R
+0 488 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 3 R
+0 -2 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+6823 4510 L
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 -482 R
+1 0 V
+-1 482 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+6793 4531 L
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 -522 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+0 8 R
+0 -8 R
+0 8 R
+0 -8 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 45 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+6765 4074 L
+-1 0 R
+-1 1 R
+1 0 V
+-2 0 R
+0 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-3 5 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 1 R
+-1 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-6 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+6720 4104 L
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1688 R
+0 1688 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+1 0 V
+-2 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+0 -2436 R
+0 2436 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+0 -3427 R
+0 3427 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+6691 4125 L
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-2 -59 R
+0 1 R
+0 -1 V
+0 1 R
+0 58 R
+-2 -59 R
+0 1 R
+0 58 R
+-2 -59 R
+-29 -796 R
+6638 1343 M
+-2 695 R
+-2 1 R
+-1 1 R
+-15 1783 R
+-29 -930 R
+-6 3 R
+6553 1296 M
+-11 1518 R
+-17 -512 R
+-40 1765 R
+6464 2871 M
+-10 1592 R
+-7 -3294 R
+-19 2846 R
+-3 -2877 R
+-1 2957 R
+-6 -1190 R
+6411 522 M
+0 2125 R
+0 -1 V
+0 -169 R
+-10 950 R
+0 -703 R
+-82 1 R
+0 173 R
+6313 595 M
+-20 3602 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+6283 4204 L
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 0 R
+6243 1015 M
+-13 555 R
+-3 2 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+2 -1 V
+-3 2 R
+1 -1 V
+-2 2 R
+1 -1 V
+-2 1 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 2 R
+3 -2 V
+-6 3 R
+6196 1594 L
+-1 2944 R
+-1 -2943 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+1 -1 V
+-1 1481 R
+1 -1 V
+-16 758 R
+-5 203 R
+0 -1 V
+-2 2 R
+-2 -2755 R
+-4 3273 R
+-7 -943 R
+6079 2373 M
+-42 538 R
+1 0 V
+6025 496 M
+0 570 R
+0 2502 R
+0 -1153 R
+0 1337 R
+0 -2446 R
+-1 1528 R
+1 -1 V
+-1 -1758 R
+0 2493 R
+0 -2502 R
+0 1 R
+0 -1 V
+-1 2502 R
+1 0 V
+-1 -2501 R
+1 0 V
+-1 1767 R
+0 919 R
+0 -2446 R
+0 -240 R
+0 2502 R
+0 -735 R
+0 -1767 R
+0 -570 R
+0 3072 R
+0 -3072 R
+0 3256 R
+0 -918 R
+0 -1528 R
+0 -765 R
+0 534 R
+-1 -9 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 2501 R
+0 -734 R
+0 -2337 R
+-1 570 R
+1 0 V
+-1 0 R
+0 -525 R
+0 2237 R
+0 94 R
+-26 -427 R
+-28 -242 R
+-19 784 R
+-44 629 R
+-7 -1323 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 1566 R
+-14 625 R
+5847 680 M
+-15 3756 R
+5821 2751 M
+-12 1803 R
+5783 1711 M
+-24 2330 R
+5737 2441 M
+0 724 R
+0 -2150 R
+0 1389 R
+0 -1389 R
+-7 2948 R
+0 -1554 R
+-15 2197 R
+5716 4606 L
+5651 2055 M
+-1 455 R
+5622 1460 M
+-2 1519 R
+1 0 V
+-1 0 R
+-6 101 R
+-69 186 R
+-6 -2203 R
+0 -283 R
+-2 2836 R
+5479 2404 M
+-5 3 R
+0 477 R
+0 -476 R
+0 -1 V
+-1 1555 R
+-1 1 R
+1 0 V
+-1 -1555 R
+0 1 R
+-38 908 R
+-16 178 R
+-8 -318 R
+-32 250 R
+-10 103 R
+-10 -41 R
+-3 -1849 R
+0 1 R
+0 2623 R
+0 -3358 R
+0 734 R
+0 -734 R
+0 734 R
+0 2625 R
+0 -3359 R
+0 734 R
+-1 1 R
+0 -1 V
+0 2624 R
+0 -664 R
+0 -2694 R
+0 735 R
+0 2848 R
+0 -1371 R
+0 239 R
+0 908 R
+5246 1755 M
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+0 338 R
+0 -338 R
+-1 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-6 1 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 1 R
+-8 1987 R
+-3 -324 R
+0 -703 R
+5159 1355 M
+0 -1 V
+-4 40 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-2 1 R
+0 3 R
+0 -3 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-3 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+5144 1401 L
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-2 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 1666 R
+0 -1666 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+5115 1421 L
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 2830 R
+0 -2830 R
+-1 1 R
+0 2830 R
+0 -2830 R
+-1 2831 R
+0 -2830 R
+-1 0 R
+-1 2831 R
+0 -2830 R
+0 950 R
+0 -950 R
+0 2830 R
+-1 -2830 R
+0 2831 R
+0 -2831 R
+0 2831 R
+0 -2831 R
+0 1 R
+0 949 R
+0 -949 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 950 R
+0 -950 R
+-1 1 R
+1 -1 V
+-1 2831 R
+1 0 V
+-1 -1881 R
+0 -949 R
+0 2830 R
+0 -2830 R
+0 2830 R
+0 -2830 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+5088 1439 L
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 2261 R
+0 -2261 R
+0 1 R
+-1 0 R
+0 1966 R
+0 -2721 R
+-1 755 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+5051 1464 L
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 196 R
+0 -196 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+1 -1 V
+-2 1 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+1 0 V
+-1 0 R
+-1 6 R
+0 -6 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+5025 1482 L
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+4998 1501 L
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+4967 1523 L
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+0 -365 R
+0 366 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1149 R
+0 -1 V
+0 -1147 R
+-1 2483 R
+-1 -2478 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+4934 1550 L
+0 1 R
+-1 -719 R
+1 0 V
+-1 719 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+1 0 V
+-2 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+0 1620 R
+0 -1620 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-5 2 R
+-1 0 R
+1 0 V
+-5 1881 R
+-4 -1868 R
+-1 0 R
+-1 1 R
+-1 1 R
+-1 0 R
+4889 1587 L
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+1 0 V
+-2 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-3 2 R
+1 -1 V
+-1 2 R
+1 -1 V
+-3 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+-1 1 R
+0 2 R
+3 -2 V
+-6 3 R
+1 -1 V
+-1 2944 R
+-1 -2943 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1481 R
+0 -1 V
+4862 1508 M
+-15 1598 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-2 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 2 R
+3 -2 V
+-5 2 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-5 -1454 R
+-1 1 R
+1 -1 V
+-1 1 R
+-2 1106 R
+4794 1108 M
+-4 514 R
+-47 -8 R
+-6 6 R
+-3 2 R
+-6 1489 R
+-4 -31 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+4708 1823 M
+-45 1340 R
+-8 1298 R
+0 -1606 R
+0 -1392 R
+0 -683 R
+0 1944 R
+0 -144 R
+0 144 R
+0 1759 R
+0 -3702 R
+0 -1 V
+0 2118 R
+-3 1273 R
+-4 -1489 R
+-14 -307 R
+4624 1245 M
+-20 -90 R
+-2 2750 R
+-9 -2143 R
+-10 1916 R
+0 -1 R
+4507 1617 M
+-30 481 R
+-8 2201 R
+4449 2783 M
+4443 906 M
+-1 3359 R
+-9 -512 R
+0 -918 R
+0 -1528 R
+0 -765 R
+-5 3416 R
+-1 -2285 R
+4428 1673 L
+-1 0 R
+-1 0 R
+-3 2095 R
+1 0 V
+-6 563 R
+4408 1798 M
+-16 2808 R
+0 -1 V
+-22 -647 R
+-7 5 R
+-1 -1554 R
+-1 1879 R
+4354 697 M
+1 0 V
+-6 1143 R
+1 0 V
+4348 698 M
+1 -1 V
+-42 2610 R
+-2 1270 R
+-7 -2497 R
+-6 -323 R
+-1 5 R
+-1 0 R
+1 0 V
+-15 2346 R
+0 -2737 R
+0 2587 R
+4234 1810 M
+-1 0 R
+1 0 V
+-1 0 R
+-12 88 R
+1 0 V
+-2 -92 R
+-5 -344 R
+-9 -566 R
+0 317 R
+-38 3247 R
+-7 -1712 R
+-12 679 R
+-8 -1110 R
+0 -96 R
+-2 20 R
+1 0 V
+-25 375 R
+4091 945 M
+0 3350 R
+4083 740 M
+-27 2384 R
+-6 985 R
+4040 2662 M
+0 1 R
+-13 673 R
+4015 1666 M
+-4 2259 R
+0 -1 V
+3943 720 M
+-25 3781 R
+3914 893 M
+-27 1500 R
+1 0 V
+3858 544 M
+0 2293 R
+3840 945 M
+0 3350 R
+3830 1362 M
+-26 1143 R
+-44 -101 R
+-1 0 R
+0 -1389 R
+0 1389 R
+-2 1 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1555 R
+0 -1554 R
+0 -1 V
+0 1555 R
+0 -1554 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 1554 R
+0 -1554 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+3750 2410 L
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 -1349 R
+0 2502 R
+0 -1153 R
+0 -1874 R
+0 2293 R
+0 -1 V
+-1 735 R
+0 -1152 R
+3741 542 M
+1 0 V
+-1 1874 R
+1 0 V
+3741 497 M
+0 1919 R
+0 1153 R
+0 -2502 R
+0 -570 R
+0 1919 R
+0 -1349 R
+0 1350 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+-2 2 R
+0 -1919 R
+0 1919 R
+0 -1341 R
+0 -1 V
+0 -577 R
+0 1919 R
+0 -1874 R
+0 -45 R
+0 1919 R
+0 -1919 R
+0 2338 R
+0 1334 R
+0 -3672 R
+0 2338 R
+0 -419 R
+0 -1341 R
+-1 1342 R
+-5 2069 R
+-8 -1110 R
+-2 -870 R
+3693 781 M
+3679 502 M
+1 0 V
+-8 961 R
+-13 2948 R
+-7 -2091 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+3644 2325 L
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+0 1012 R
+0 -1012 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 145 R
+-1 -145 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-12 336 R
+-5 -293 R
+3598 2387 L
+-3 1722 R
+-13 485 R
+-3 -727 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+3547 2275 M
+-28 -772 R
+-45 1177 R
+3464 1448 M
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-4 2468 R
+0 -1 V
+3431 496 M
+0 2228 R
+-1 1150 R
+-15 -649 R
+-3 -386 R
+0 1334 R
+0 -3672 R
+0 2338 R
+-35 1269 R
+-44 -747 R
+-5 -1742 R
+-16 2915 R
+3246 788 M
+1 0 V
+-15 -42 R
+-49 3613 R
+-12 -184 R
+-3 83 R
+3156 1285 M
+-37 2065 R
+3080 1561 M
+-14 2759 R
+3062 817 M
+-129 864 R
+-3 1453 R
+0 -1595 R
+-1 1595 R
+1 0 V
+-1 -1595 R
+1 0 V
+-1 1596 R
+0 -1596 R
+-14 2313 R
+-6 -949 R
+2896 834 M
+-1 2053 R
+0 -1982 R
+-10 1848 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-5 -9 R
+-5 507 R
+2865 784 M
+-18 1326 R
+-13 604 R
+2833 650 M
+-9 3591 R
+-6 -2377 R
+1 0 V
+-1 1541 R
+-1 1 R
+-1 5 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 -5 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+0 -613 R
+0 613 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-2 -615 R
+-1 0 R
+1 0 V
+-1 617 R
+-1 0 R
+-17 -451 R
+-18 700 R
+0 -36 R
+2759 1617 M
+-69 1524 R
+0 1 R
+2690 3141 L
+-1 1 R
+0 -266 R
+0 266 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 -656 R
+1 0 V
+-1 656 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 -1086 R
+0 1086 R
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+2662 3161 L
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+2634 3180 L
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 880 R
+0 -1 V
+-1 -878 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 -1620 R
+0 1621 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+1 0 V
+-2 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+2605 3200 L
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-2 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 -2348 R
+0 2349 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+0 -2516 R
+0 2455 R
+0 64 R
+-1 -1552 R
+0 1550 R
+2572 3222 L
+-1 1 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+0 5 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+2546 3246 L
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 -8 R
+0 8 R
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+2514 3268 L
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+0 -2256 R
+0 156 R
+-1 2100 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 -2065 R
+0 615 R
+0 1450 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+2486 3287 L
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-2 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+2453 3310 L
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+-1 0 R
+-1 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+2421 3332 L
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+-1 1 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+1 -1 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 83 R
+0 -1662 R
+0 1579 R
+0 1 R
+0 -1 V
+-1 1 R
+1 0 V
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+1 0 V
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+0 1 R
+0 -1 V
+-1 1 R
+-1 -989 R
+0 990 R
+-10 275 R
+1 0 V
+-1 -387 R
+-23 -835 R
+-14 66 R
+-8 254 R
+-16 -696 R
+-20 1928 R
+-23 578 R
+-4 -1033 R
+-6 892 R
+2256 1997 M
+-15 -869 R
+-1 1 R
+0 -1 V
+-2 1306 R
+1 0 V
+-25 993 R
+2215 3427 L
+-1 -703 R
+1 0 V
+-1 1150 R
+-18 -866 R
+-38 -264 R
+0 -1 V
+-4 -1527 R
+0 -1 V
+-3 1512 R
+2131 1063 M
+0 47 R
+0 2317 R
+0 -847 R
+0 -1800 R
+0 221 R
+0 -220 R
+0 -1 V
+0 2118 R
+0 1586 R
+0 -1586 R
+0 -173 R
+-21 -213 R
+-30 1878 R
+-38 -86 R
+0 -3762 R
+-50 1861 R
+1976 902 M
+-4 1953 R
+-1 1695 R
+1958 641 M
+-1 1763 R
+-1 0 R
+-3 1107 R
+0 451 R
+-1 -1554 R
+0 1555 R
+-6 -5 R
+-1 0 R
+-1 1 R
+1 0 V
+-1 0 R
+-1 1 R
+-1 0 R
+1 0 V
+-1 0 R
+0 1 R
+0 -1 V
+0 1 R
+-1 0 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 -450 R
+0 451 R
+-1 0 R
+1 0 V
+-1 1 R
+0 -1 V
+0 1 R
+-1 -1554 R
+0 1554 R
+0 -2838 R
+-4 2496 R
+0 -1336 R
+1900 519 M
+0 -1 V
+-36 2321 R
+0 1334 R
+0 -3672 R
+0 2338 R
+0 1334 R
+1762 2477 M
+0 -1955 R
+0 2125 R
+0 -1 V
+-4 1397 R
+-24 74 R
+-1 -711 R
+1711 1463 M
+0 2411 R
+0 -3168 R
+0 2192 R
+0 -1435 R
+0 1964 R
+0 324 R
+1696 906 M
+0 734 R
+0 2625 R
+0 -3359 R
+-1 2715 R
+0 -2531 R
+-3 2 R
+-9 1635 R
+-13 180 R
+1636 1143 M
+-26 2344 R
+0 -2288 R
+0 2764 R
+1609 780 M
+0 -74 R
+0 295 R
+-9 2440 R
+1585 1463 M
+-1 -682 R
+-17 1151 R
+-30 135 R
+1489 746 M
+1489 745 L
+0 1 R
+-13 -48 R
+1 0 V
+-1 1 R
+1 0 V
+-1 1358 R
+0 -1303 R
+0 1 R
+-18 2273 R
+-12 -784 R
+-12 617 R
+1409 746 M
+0 -1 V
+0 1 R
+-2 1210 R
+0 1182 R
+0 -1595 R
+-18 897 R
+-2 1394 R
+-23 707 R
+1330 3475 M
+-35 128 R
+0 -2799 R
+0 4 R
+0 2799 R
+1274 1597 M
+0 1479 R
+0 1148 R
+1253 3192 M
+-28 966 R
+1206 834 M
+1 0 V
+-1 2052 R
+0 -1 V
+-1 -481 R
+-1 0 R
+-5 -1266 R
+0 2373 R
+-1 -1103 R
+-1 1555 R
+1183 1420 M
+-18 -381 R
+-1 590 R
+0 -1 V
+-25 776 R
+1 0 V
+-1 0 R
+-5 4 R
+0 -1 V
+0 1555 R
+-1 -2529 R
+1 0 V
+-1 975 R
+0 1555 R
+1122 935 M
+stroke
+1.000 UL
+LTb
+1113 4620 N
+0 -4137 V
+5849 0 V
+0 4137 V
+-5849 0 V
+Z stroke
+1.000 UP
+1.000 UL
+LTb
+stroke
+grestore
+end
+showpage
+%%Trailer
+%%DocumentFonts: Helvetica
+%%Pages: 1
--- /dev/null
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<svg height="600" style="stroke: black; stroke-width: 1; font: Helvetica" width="800" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <rect height="600" style="fill: white; stroke: white" width="800" />
+ <g id="chr_layout">
+ <g id="chr1">
+ <rect fill="" height="1" stroke="" width="11" x="37" y="174" />
+ <rect fill="" height="1" stroke="" width="11" x="37" y="64" />
+ <clipPath id="chr1_clipPath">
+ <rect height="150.819" rx="10" ry="10" width="11.538" x="30.769" y="0.000" />
+ <rect height="149.181" rx="10" ry="10" width="11.538" x="30.769" y="150.819" />
+ </clipPath>
+ <g clip-path="url(#chr1_clipPath)">
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="30.769" y="0.000" />
+ <rect fill="LightGrey" height="3.640" stroke-width="0" width="11.538" x="30.769" y="2.791" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="30.769" y="6.431" />
+ <rect fill="LightGrey" height="2.548" stroke-width="0" width="11.538" x="30.769" y="8.615" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="30.769" y="11.163" />
+ <rect fill="grey" height="4.247" stroke-width="0" width="11.538" x="30.769" y="15.288" />
+ <rect fill="white" height="5.096" stroke-width="0" width="11.538" x="30.769" y="19.535" />
+ <rect fill="LightGrey" height="4.247" stroke-width="0" width="11.538" x="30.769" y="24.631" />
+ <rect fill="white" height="4.853" stroke-width="0" width="11.538" x="30.769" y="28.878" />
+ <rect fill="LightGrey" height="2.669" stroke-width="0" width="11.538" x="30.769" y="33.731" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="30.769" y="36.400" />
+ <rect fill="LightGrey" height="2.669" stroke-width="0" width="11.538" x="30.769" y="39.070" />
+ <rect fill="white" height="6.309" stroke-width="0" width="11.538" x="30.769" y="41.739" />
+ <rect fill="LightGrey" height="5.217" stroke-width="0" width="11.538" x="30.769" y="48.049" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="30.769" y="53.266" />
+ <rect fill="rgb(169,169,169)" height="5.824" stroke-width="0" width="11.538" x="30.769" y="56.421" />
+ <rect fill="white" height="5.945" stroke-width="0" width="11.538" x="30.769" y="62.245" />
+ <rect fill="grey" height="3.033" stroke-width="0" width="11.538" x="30.769" y="68.190" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="30.769" y="71.224" />
+ <rect fill="grey" height="9.464" stroke-width="0" width="11.538" x="30.769" y="73.893" />
+ <rect fill="white" height="0.971" stroke-width="0" width="11.538" x="30.769" y="83.357" />
+ <rect fill="black" height="18.443" stroke-width="0" width="11.538" x="30.769" y="84.328" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="30.769" y="102.771" />
+ <rect fill="rgb(169,169,169)" height="4.732" stroke-width="0" width="11.538" x="30.769" y="106.896" />
+ <rect fill="white" height="3.033" stroke-width="0" width="11.538" x="30.769" y="111.628" />
+ <rect fill="rgb(169,169,169)" height="5.945" stroke-width="0" width="11.538" x="30.769" y="114.661" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="30.769" y="120.607" />
+ <rect fill="black" height="6.067" stroke-width="0" width="11.538" x="30.769" y="123.762" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="30.769" y="129.828" />
+ <rect fill="grey" height="5.217" stroke-width="0" width="11.538" x="30.769" y="135.410" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="30.769" y="140.627" />
+ <rect fill="grey" height="3.761" stroke-width="0" width="11.538" x="30.769" y="142.690" />
+ <rect fill="white" height="0.485" stroke-width="0" width="11.538" x="30.769" y="146.451" />
+ <rect fill="DarkGrey" height="3.883" stroke-width="0" width="11.538" x="30.769" y="146.936" />
+ <rect fill="DarkGrey" height="4.489" stroke-width="0" width="11.538" x="30.769" y="150.819" />
+ <rect fill="LightGrey" height="17.472" stroke-width="0" width="11.538" x="30.769" y="155.309" />
+ <rect fill="white" height="6.795" stroke-width="0" width="11.538" x="30.769" y="172.781" />
+ <rect fill="grey" height="1.941" stroke-width="0" width="11.538" x="30.769" y="179.576" />
+ <rect fill="white" height="4.489" stroke-width="0" width="11.538" x="30.769" y="181.517" />
+ <rect fill="grey" height="1.820" stroke-width="0" width="11.538" x="30.769" y="186.006" />
+ <rect fill="white" height="3.033" stroke-width="0" width="11.538" x="30.769" y="187.826" />
+ <rect fill="grey" height="1.820" stroke-width="0" width="11.538" x="30.769" y="190.860" />
+ <rect fill="white" height="6.067" stroke-width="0" width="11.538" x="30.769" y="192.680" />
+ <rect fill="grey" height="2.063" stroke-width="0" width="11.538" x="30.769" y="198.746" />
+ <rect fill="white" height="4.368" stroke-width="0" width="11.538" x="30.769" y="200.809" />
+ <rect fill="rgb(169,169,169)" height="2.548" stroke-width="0" width="11.538" x="30.769" y="205.177" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="30.769" y="207.725" />
+ <rect fill="grey" height="5.217" stroke-width="0" width="11.538" x="30.769" y="211.487" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="30.769" y="216.704" />
+ <rect fill="black" height="6.067" stroke-width="0" width="11.538" x="30.769" y="223.256" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="30.769" y="229.323" />
+ <rect fill="black" height="6.552" stroke-width="0" width="11.538" x="30.769" y="233.084" />
+ <rect fill="white" height="9.464" stroke-width="0" width="11.538" x="30.769" y="239.636" />
+ <rect fill="LightGrey" height="5.096" stroke-width="0" width="11.538" x="30.769" y="249.100" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="30.769" y="254.196" />
+ <rect fill="black" height="12.133" stroke-width="0" width="11.538" x="30.769" y="257.351" />
+ <rect fill="white" height="0.728" stroke-width="0" width="11.538" x="30.769" y="269.485" />
+ <rect fill="LightGrey" height="2.912" stroke-width="0" width="11.538" x="30.769" y="270.213" />
+ <rect fill="white" height="4.489" stroke-width="0" width="11.538" x="30.769" y="273.125" />
+ <rect fill="grey" height="4.732" stroke-width="0" width="11.538" x="30.769" y="277.614" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="30.769" y="282.346" />
+ <rect fill="rgb(169,169,169)" height="8.615" stroke-width="0" width="11.538" x="30.769" y="284.651" />
+ <rect fill="white" height="6.734" stroke-width="0" width="11.538" x="30.769" y="293.266" />
+ <rect fill="none" height="150.819" rx="10" ry="10" width="11.538" x="30.769" y="0.000" />
+ <rect fill="none" height="149.181" rx="10" ry="10" width="11.538" x="30.769" y="150.819" />
+ </g>
+ <text x="32.538" y="315.000">1
+ </text>
+ </g>
+ <g id="chr2">
+ <rect fill="" height="1" stroke="" width="11" x="67" y="37" />
+ <rect fill="" height="1" stroke="" width="11" x="67" y="106" />
+ <clipPath id="chr2_clipPath">
+ <rect height="113.205" rx="10" ry="10" width="11.538" x="61.538" y="5.216" />
+ <rect height="181.579" rx="10" ry="10" width="11.538" x="61.538" y="118.421" />
+ </clipPath>
+ <g clip-path="url(#chr2_clipPath)">
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="61.538" y="5.216" />
+ <rect fill="grey" height="3.276" stroke-width="0" width="11.538" x="61.538" y="10.433" />
+ <rect fill="white" height="7.037" stroke-width="0" width="11.538" x="61.538" y="13.709" />
+ <rect fill="rgb(169,169,169)" height="5.096" stroke-width="0" width="11.538" x="61.538" y="20.747" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="61.538" y="25.843" />
+ <rect fill="rgb(169,169,169)" height="5.824" stroke-width="0" width="11.538" x="61.538" y="28.391" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="61.538" y="34.215" />
+ <rect fill="LightGrey" height="2.548" stroke-width="0" width="11.538" x="61.538" y="38.825" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="61.538" y="41.373" />
+ <rect fill="rgb(169,169,169)" height="5.460" stroke-width="0" width="11.538" x="61.538" y="43.921" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="61.538" y="49.382" />
+ <rect fill="grey" height="3.883" stroke-width="0" width="11.538" x="61.538" y="51.808" />
+ <rect fill="white" height="7.280" stroke-width="0" width="11.538" x="61.538" y="55.691" />
+ <rect fill="black" height="6.188" stroke-width="0" width="11.538" x="61.538" y="62.971" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="61.538" y="69.159" />
+ <rect fill="black" height="7.644" stroke-width="0" width="11.538" x="61.538" y="71.707" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="61.538" y="79.351" />
+ <rect fill="grey" height="7.887" stroke-width="0" width="11.538" x="61.538" y="82.870" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="61.538" y="90.757" />
+ <rect fill="grey" height="1.577" stroke-width="0" width="11.538" x="61.538" y="93.305" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="61.538" y="94.882" />
+ <rect fill="black" height="10.071" stroke-width="0" width="11.538" x="61.538" y="96.702" />
+ <rect fill="white" height="8.857" stroke-width="0" width="11.538" x="61.538" y="106.773" />
+ <rect fill="DarkGrey" height="2.791" stroke-width="0" width="11.538" x="61.538" y="115.630" />
+ <rect fill="DarkGrey" height="2.912" stroke-width="0" width="11.538" x="61.538" y="118.421" />
+ <rect fill="white" height="7.765" stroke-width="0" width="11.538" x="61.538" y="121.333" />
+ <rect fill="grey" height="3.883" stroke-width="0" width="11.538" x="61.538" y="129.099" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="61.538" y="132.981" />
+ <rect fill="LightGrey" height="2.305" stroke-width="0" width="11.538" x="61.538" y="134.680" />
+ <rect fill="white" height="6.309" stroke-width="0" width="11.538" x="61.538" y="136.985" />
+ <rect fill="grey" height="5.824" stroke-width="0" width="11.538" x="61.538" y="143.295" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="61.538" y="149.119" />
+ <rect fill="grey" height="9.100" stroke-width="0" width="11.538" x="61.538" y="153.365" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="61.538" y="162.466" />
+ <rect fill="LightGrey" height="3.155" stroke-width="0" width="11.538" x="61.538" y="165.620" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="61.538" y="168.775" />
+ <rect fill="black" height="7.037" stroke-width="0" width="11.538" x="61.538" y="170.959" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="61.538" y="177.996" />
+ <rect fill="black" height="4.489" stroke-width="0" width="11.538" x="61.538" y="180.787" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="61.538" y="185.277" />
+ <rect fill="LightGrey" height="0.849" stroke-width="0" width="11.538" x="61.538" y="186.733" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="61.538" y="187.582" />
+ <rect fill="rgb(169,169,169)" height="6.067" stroke-width="0" width="11.538" x="61.538" y="192.799" />
+ <rect fill="white" height="4.732" stroke-width="0" width="11.538" x="61.538" y="198.866" />
+ <rect fill="rgb(169,169,169)" height="7.280" stroke-width="0" width="11.538" x="61.538" y="203.598" />
+ <rect fill="white" height="9.949" stroke-width="0" width="11.538" x="61.538" y="210.878" />
+ <rect fill="grey" height="3.276" stroke-width="0" width="11.538" x="61.538" y="220.828" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="61.538" y="224.104" />
+ <rect fill="rgb(169,169,169)" height="7.765" stroke-width="0" width="11.538" x="61.538" y="226.894" />
+ <rect fill="white" height="3.033" stroke-width="0" width="11.538" x="61.538" y="234.660" />
+ <rect fill="rgb(169,169,169)" height="6.673" stroke-width="0" width="11.538" x="61.538" y="237.693" />
+ <rect fill="white" height="7.765" stroke-width="0" width="11.538" x="61.538" y="244.367" />
+ <rect fill="grey" height="2.548" stroke-width="0" width="11.538" x="61.538" y="252.132" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="61.538" y="254.680" />
+ <rect fill="black" height="7.280" stroke-width="0" width="11.538" x="61.538" y="258.927" />
+ <rect fill="white" height="7.523" stroke-width="0" width="11.538" x="61.538" y="266.207" />
+ <rect fill="rgb(169,169,169)" height="4.368" stroke-width="0" width="11.538" x="61.538" y="273.730" />
+ <rect fill="white" height="1.092" stroke-width="0" width="11.538" x="61.538" y="278.098" />
+ <rect fill="black" height="5.945" stroke-width="0" width="11.538" x="61.538" y="279.190" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="61.538" y="285.135" />
+ <rect fill="grey" height="2.063" stroke-width="0" width="11.538" x="61.538" y="290.716" />
+ <rect fill="white" height="7.221" stroke-width="0" width="11.538" x="61.538" y="292.779" />
+ <rect fill="none" height="113.205" rx="10" ry="10" width="11.538" x="61.538" y="5.216" />
+ <rect fill="none" height="181.579" rx="10" ry="10" width="11.538" x="61.538" y="118.421" />
+ </g>
+ <text x="63.308" y="315.000">2
+ </text>
+ </g>
+ <g id="chr3">
+ <rect fill="" height="1" stroke="" width="11" x="98" y="157" />
+ <clipPath id="chr3_clipPath">
+ <rect height="111.264" rx="10" ry="10" width="11.538" x="92.308" y="57.935" />
+ <rect height="130.801" rx="10" ry="10" width="11.538" x="92.308" y="169.199" />
+ </clipPath>
+ <g clip-path="url(#chr3_clipPath)">
+ <rect fill="grey" height="4.247" stroke-width="0" width="11.538" x="92.308" y="57.935" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="92.308" y="62.182" />
+ <rect fill="grey" height="3.883" stroke-width="0" width="11.538" x="92.308" y="64.608" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="92.308" y="68.491" />
+ <rect fill="LightGrey" height="1.092" stroke-width="0" width="11.538" x="92.308" y="71.888" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="92.308" y="72.980" />
+ <rect fill="black" height="11.041" stroke-width="0" width="11.538" x="92.308" y="75.771" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="92.308" y="86.813" />
+ <rect fill="rgb(169,169,169)" height="5.339" stroke-width="0" width="11.538" x="92.308" y="89.967" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="92.308" y="95.306" />
+ <rect fill="grey" height="5.339" stroke-width="0" width="11.538" x="92.308" y="96.883" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="92.308" y="102.222" />
+ <rect fill="rgb(169,169,169)" height="5.217" stroke-width="0" width="11.538" x="92.308" y="105.619" />
+ <rect fill="white" height="0.971" stroke-width="0" width="11.538" x="92.308" y="110.837" />
+ <rect fill="grey" height="0.364" stroke-width="0" width="11.538" x="92.308" y="111.807" />
+ <rect fill="white" height="8.129" stroke-width="0" width="11.538" x="92.308" y="112.171" />
+ <rect fill="LightGrey" height="0.364" stroke-width="0" width="11.538" x="92.308" y="120.301" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="92.308" y="120.665" />
+ <rect fill="grey" height="4.975" stroke-width="0" width="11.538" x="92.308" y="123.941" />
+ <rect fill="white" height="6.309" stroke-width="0" width="11.538" x="92.308" y="128.916" />
+ <rect fill="grey" height="9.828" stroke-width="0" width="11.538" x="92.308" y="135.225" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="92.308" y="145.053" />
+ <rect fill="rgb(169,169,169)" height="9.221" stroke-width="0" width="11.538" x="92.308" y="147.965" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="92.308" y="157.187" />
+ <rect fill="rgb(169,169,169)" height="4.247" stroke-width="0" width="11.538" x="92.308" y="159.492" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="92.308" y="163.739" />
+ <rect fill="DarkGrey" height="2.791" stroke-width="0" width="11.538" x="92.308" y="166.408" />
+ <rect fill="DarkGrey" height="1.820" stroke-width="0" width="11.538" x="92.308" y="169.199" />
+ <rect fill="LightGrey" height="8.008" stroke-width="0" width="11.538" x="92.308" y="171.019" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="92.308" y="179.027" />
+ <rect fill="LightGrey" height="1.213" stroke-width="0" width="11.538" x="92.308" y="181.090" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="92.308" y="182.303" />
+ <rect fill="rgb(169,169,169)" height="4.125" stroke-width="0" width="11.538" x="92.308" y="184.608" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="92.308" y="188.734" />
+ <rect fill="grey" height="4.004" stroke-width="0" width="11.538" x="92.308" y="190.796" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="92.308" y="194.800" />
+ <rect fill="rgb(169,169,169)" height="4.611" stroke-width="0" width="11.538" x="92.308" y="197.470" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="92.308" y="202.081" />
+ <rect fill="rgb(169,169,169)" height="3.519" stroke-width="0" width="11.538" x="92.308" y="204.143" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="92.308" y="207.662" />
+ <rect fill="LightGrey" height="2.791" stroke-width="0" width="11.538" x="92.308" y="210.089" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="92.308" y="212.879" />
+ <rect fill="LightGrey" height="5.096" stroke-width="0" width="11.538" x="92.308" y="217.490" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="92.308" y="222.586" />
+ <rect fill="LightGrey" height="3.640" stroke-width="0" width="11.538" x="92.308" y="224.649" />
+ <rect fill="white" height="4.853" stroke-width="0" width="11.538" x="92.308" y="228.289" />
+ <rect fill="black" height="7.280" stroke-width="0" width="11.538" x="92.308" y="233.142" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="92.308" y="240.422" />
+ <rect fill="grey" height="3.397" stroke-width="0" width="11.538" x="92.308" y="244.184" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="92.308" y="247.581" />
+ <rect fill="grey" height="2.184" stroke-width="0" width="11.538" x="92.308" y="249.765" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="92.308" y="251.949" />
+ <rect fill="black" height="9.707" stroke-width="0" width="11.538" x="92.308" y="253.527" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="92.308" y="263.233" />
+ <rect fill="rgb(169,169,169)" height="5.824" stroke-width="0" width="11.538" x="92.308" y="267.237" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="92.308" y="273.061" />
+ <rect fill="rgb(169,169,169)" height="4.368" stroke-width="0" width="11.538" x="92.308" y="277.066" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="92.308" y="281.434" />
+ <rect fill="LightGrey" height="1.820" stroke-width="0" width="11.538" x="92.308" y="283.618" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="92.308" y="285.438" />
+ <rect fill="rgb(169,169,169)" height="5.339" stroke-width="0" width="11.538" x="92.308" y="287.743" />
+ <rect fill="white" height="6.918" stroke-width="0" width="11.538" x="92.308" y="293.082" />
+ <rect fill="none" height="111.264" rx="10" ry="10" width="11.538" x="92.308" y="57.935" />
+ <rect fill="none" height="130.801" rx="10" ry="10" width="11.538" x="92.308" y="169.199" />
+ </g>
+ <text x="94.077" y="315.000">3
+ </text>
+ </g>
+ <g id="chr4">
+ <rect fill="" height="1" stroke="" width="11" x="129" y="98" />
+ <rect fill="" height="1" stroke="" width="11" x="129" y="161" />
+ <rect fill="" height="1" stroke="" width="11" x="129" y="270" />
+ <clipPath id="chr4_clipPath">
+ <rect height="61.517" rx="10" ry="10" width="11.538" x="123.077" y="67.919" />
+ <rect height="170.564" rx="10" ry="10" width="11.538" x="123.077" y="129.436" />
+ </clipPath>
+ <g clip-path="url(#chr4_clipPath)">
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="123.077" y="67.919" />
+ <rect fill="LightGrey" height="2.548" stroke-width="0" width="11.538" x="123.077" y="71.681" />
+ <rect fill="white" height="6.916" stroke-width="0" width="11.538" x="123.077" y="74.229" />
+ <rect fill="grey" height="5.339" stroke-width="0" width="11.538" x="123.077" y="81.145" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="123.077" y="86.483" />
+ <rect fill="rgb(169,169,169)" height="5.581" stroke-width="0" width="11.538" x="123.077" y="90.366" />
+ <rect fill="white" height="5.824" stroke-width="0" width="11.538" x="123.077" y="95.948" />
+ <rect fill="black" height="9.221" stroke-width="0" width="11.538" x="123.077" y="101.772" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="123.077" y="110.993" />
+ <rect fill="grey" height="5.703" stroke-width="0" width="11.538" x="123.077" y="117.545" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="123.077" y="123.248" />
+ <rect fill="DarkGrey" height="2.427" stroke-width="0" width="11.538" x="123.077" y="127.009" />
+ <rect fill="DarkGrey" height="2.063" stroke-width="0" width="11.538" x="123.077" y="129.436" />
+ <rect fill="white" height="8.251" stroke-width="0" width="11.538" x="123.077" y="131.499" />
+ <rect fill="black" height="8.615" stroke-width="0" width="11.538" x="123.077" y="139.749" />
+ <rect fill="white" height="4.975" stroke-width="0" width="11.538" x="123.077" y="148.364" />
+ <rect fill="rgb(169,169,169)" height="7.401" stroke-width="0" width="11.538" x="123.077" y="153.339" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="123.077" y="160.740" />
+ <rect fill="grey" height="4.125" stroke-width="0" width="11.538" x="123.077" y="164.016" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="123.077" y="168.142" />
+ <rect fill="LightGrey" height="3.397" stroke-width="0" width="11.538" x="123.077" y="170.204" />
+ <rect fill="white" height="1.335" stroke-width="0" width="11.538" x="123.077" y="173.602" />
+ <rect fill="rgb(169,169,169)" height="7.037" stroke-width="0" width="11.538" x="123.077" y="174.936" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="123.077" y="181.974" />
+ <rect fill="rgb(169,169,169)" height="4.489" stroke-width="0" width="11.538" x="123.077" y="183.673" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="123.077" y="188.162" />
+ <rect fill="grey" height="6.552" stroke-width="0" width="11.538" x="123.077" y="192.287" />
+ <rect fill="white" height="7.523" stroke-width="0" width="11.538" x="123.077" y="198.839" />
+ <rect fill="rgb(169,169,169)" height="7.887" stroke-width="0" width="11.538" x="123.077" y="206.362" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="123.077" y="214.249" />
+ <rect fill="grey" height="6.188" stroke-width="0" width="11.538" x="123.077" y="218.374" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="123.077" y="224.562" />
+ <rect fill="black" height="9.949" stroke-width="0" width="11.538" x="123.077" y="227.232" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="123.077" y="237.181" />
+ <rect fill="LightGrey" height="4.004" stroke-width="0" width="11.538" x="123.077" y="239.851" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="123.077" y="243.855" />
+ <rect fill="LightGrey" height="4.004" stroke-width="0" width="11.538" x="123.077" y="247.131" />
+ <rect fill="white" height="4.975" stroke-width="0" width="11.538" x="123.077" y="251.135" />
+ <rect fill="black" height="7.765" stroke-width="0" width="11.538" x="123.077" y="256.109" />
+ <rect fill="white" height="3.640" stroke-width="0" width="11.538" x="123.077" y="263.875" />
+ <rect fill="black" height="7.159" stroke-width="0" width="11.538" x="123.077" y="267.515" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="123.077" y="274.674" />
+ <rect fill="rgb(169,169,169)" height="5.339" stroke-width="0" width="11.538" x="123.077" y="276.858" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="123.077" y="282.196" />
+ <rect fill="black" height="5.824" stroke-width="0" width="11.538" x="123.077" y="283.652" />
+ <rect fill="white" height="5.703" stroke-width="0" width="11.538" x="123.077" y="289.477" />
+ <rect fill="LightGrey" height="4.821" stroke-width="0" width="11.538" x="123.077" y="295.179" />
+ <rect fill="none" height="61.517" rx="10" ry="10" width="11.538" x="123.077" y="67.919" />
+ <rect fill="none" height="170.564" rx="10" ry="10" width="11.538" x="123.077" y="129.436" />
+ </g>
+ <text x="124.846" y="315.000">4
+ </text>
+ </g>
+ <g id="chr5">
+ <rect fill="" height="1" stroke="" width="11" x="160" y="271" />
+ <clipPath id="chr5_clipPath">
+ <rect height="57.877" rx="10" ry="10" width="11.538" x="153.846" y="80.556" />
+ <rect height="161.567" rx="10" ry="10" width="11.538" x="153.846" y="138.433" />
+ </clipPath>
+ <g clip-path="url(#chr5_clipPath)">
+ <rect fill="white" height="5.339" stroke-width="0" width="11.538" x="153.846" y="80.556" />
+ <rect fill="LightGrey" height="1.941" stroke-width="0" width="11.538" x="153.846" y="85.895" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="153.846" y="87.837" />
+ <rect fill="grey" height="8.372" stroke-width="0" width="11.538" x="153.846" y="90.506" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="153.846" y="98.878" />
+ <rect fill="black" height="5.824" stroke-width="0" width="11.538" x="153.846" y="103.003" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="153.846" y="108.827" />
+ <rect fill="black" height="5.581" stroke-width="0" width="11.538" x="153.846" y="110.526" />
+ <rect fill="white" height="6.188" stroke-width="0" width="11.538" x="153.846" y="116.108" />
+ <rect fill="LightGrey" height="4.975" stroke-width="0" width="11.538" x="153.846" y="122.296" />
+ <rect fill="white" height="4.732" stroke-width="0" width="11.538" x="153.846" y="127.270" />
+ <rect fill="grey" height="4.125" stroke-width="0" width="11.538" x="153.846" y="132.002" />
+ <rect fill="DarkGrey" height="2.305" stroke-width="0" width="11.538" x="153.846" y="136.128" />
+ <rect fill="DarkGrey" height="3.397" stroke-width="0" width="11.538" x="153.846" y="138.433" />
+ <rect fill="white" height="10.192" stroke-width="0" width="11.538" x="153.846" y="141.831" />
+ <rect fill="rgb(169,169,169)" height="4.975" stroke-width="0" width="11.538" x="153.846" y="152.023" />
+ <rect fill="white" height="0.849" stroke-width="0" width="11.538" x="153.846" y="156.997" />
+ <rect fill="rgb(169,169,169)" height="3.397" stroke-width="0" width="11.538" x="153.846" y="157.847" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="153.846" y="161.244" />
+ <rect fill="grey" height="5.945" stroke-width="0" width="11.538" x="153.846" y="163.549" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="153.846" y="169.495" />
+ <rect fill="grey" height="5.945" stroke-width="0" width="11.538" x="153.846" y="173.256" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="153.846" y="179.202" />
+ <rect fill="black" height="11.041" stroke-width="0" width="11.538" x="153.846" y="181.022" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="153.846" y="192.063" />
+ <rect fill="black" height="6.673" stroke-width="0" width="11.538" x="153.846" y="198.615" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="153.846" y="205.289" />
+ <rect fill="black" height="6.188" stroke-width="0" width="11.538" x="153.846" y="207.351" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="153.846" y="213.539" />
+ <rect fill="grey" height="1.941" stroke-width="0" width="11.538" x="153.846" y="215.845" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="153.846" y="217.786" />
+ <rect fill="black" height="7.644" stroke-width="0" width="11.538" x="153.846" y="220.334" />
+ <rect fill="white" height="7.037" stroke-width="0" width="11.538" x="153.846" y="227.978" />
+ <rect fill="black" height="3.761" stroke-width="0" width="11.538" x="153.846" y="235.016" />
+ <rect fill="white" height="6.067" stroke-width="0" width="11.538" x="153.846" y="238.777" />
+ <rect fill="LightGrey" height="4.368" stroke-width="0" width="11.538" x="153.846" y="244.844" />
+ <rect fill="white" height="4.975" stroke-width="0" width="11.538" x="153.846" y="249.212" />
+ <rect fill="rgb(169,169,169)" height="4.975" stroke-width="0" width="11.538" x="153.846" y="254.187" />
+ <rect fill="white" height="5.945" stroke-width="0" width="11.538" x="153.846" y="259.161" />
+ <rect fill="grey" height="4.247" stroke-width="0" width="11.538" x="153.846" y="265.107" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="153.846" y="269.353" />
+ <rect fill="black" height="9.100" stroke-width="0" width="11.538" x="153.846" y="274.571" />
+ <rect fill="white" height="5.824" stroke-width="0" width="11.538" x="153.846" y="283.671" />
+ <rect fill="LightGrey" height="5.217" stroke-width="0" width="11.538" x="153.846" y="289.495" />
+ <rect fill="white" height="5.288" stroke-width="0" width="11.538" x="153.846" y="294.712" />
+ <rect fill="none" height="57.877" rx="10" ry="10" width="11.538" x="153.846" y="80.556" />
+ <rect fill="none" height="161.567" rx="10" ry="10" width="11.538" x="153.846" y="138.433" />
+ </g>
+ <text x="155.615" y="315.000">5
+ </text>
+ </g>
+ <g id="chr6">
+ <clipPath id="chr6_clipPath">
+ <rect height="73.408" rx="10" ry="10" width="11.538" x="184.615" y="92.639" />
+ <rect height="133.954" rx="10" ry="10" width="11.538" x="184.615" y="166.046" />
+ </clipPath>
+ <g clip-path="url(#chr6_clipPath)">
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="184.615" y="92.639" />
+ <rect fill="LightGrey" height="2.184" stroke-width="0" width="11.538" x="184.615" y="95.430" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="184.615" y="97.614" />
+ <rect fill="grey" height="4.368" stroke-width="0" width="11.538" x="184.615" y="101.132" />
+ <rect fill="white" height="0.728" stroke-width="0" width="11.538" x="184.615" y="105.500" />
+ <rect fill="LightGrey" height="2.791" stroke-width="0" width="11.538" x="184.615" y="106.228" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="184.615" y="109.019" />
+ <rect fill="rgb(169,169,169)" height="9.707" stroke-width="0" width="11.538" x="184.615" y="111.446" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="184.615" y="121.152" />
+ <rect fill="grey" height="4.611" stroke-width="0" width="11.538" x="184.615" y="124.307" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="184.615" y="128.918" />
+ <rect fill="LightGrey" height="2.063" stroke-width="0" width="11.538" x="184.615" y="131.345" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="184.615" y="133.407" />
+ <rect fill="LightGrey" height="4.611" stroke-width="0" width="11.538" x="184.615" y="137.290" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="184.615" y="141.901" />
+ <rect fill="black" height="7.159" stroke-width="0" width="11.538" x="184.615" y="147.482" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="184.615" y="154.641" />
+ <rect fill="black" height="5.581" stroke-width="0" width="11.538" x="184.615" y="156.461" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="184.615" y="162.042" />
+ <rect fill="DarkGrey" height="2.548" stroke-width="0" width="11.538" x="184.615" y="163.498" />
+ <rect fill="DarkGrey" height="3.519" stroke-width="0" width="11.538" x="184.615" y="166.046" />
+ <rect fill="white" height="0.121" stroke-width="0" width="11.538" x="184.615" y="169.565" />
+ <rect fill="black" height="7.887" stroke-width="0" width="11.538" x="184.615" y="169.686" />
+ <rect fill="white" height="7.159" stroke-width="0" width="11.538" x="184.615" y="177.573" />
+ <rect fill="grey" height="9.707" stroke-width="0" width="11.538" x="184.615" y="184.732" />
+ <rect fill="white" height="0.971" stroke-width="0" width="11.538" x="184.615" y="194.439" />
+ <rect fill="grey" height="3.397" stroke-width="0" width="11.538" x="184.615" y="195.409" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="184.615" y="198.807" />
+ <rect fill="black" height="8.008" stroke-width="0" width="11.538" x="184.615" y="204.388" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="184.615" y="212.396" />
+ <rect fill="black" height="5.945" stroke-width="0" width="11.538" x="184.615" y="213.852" />
+ <rect fill="white" height="11.041" stroke-width="0" width="11.538" x="184.615" y="219.798" />
+ <rect fill="rgb(169,169,169)" height="3.883" stroke-width="0" width="11.538" x="184.615" y="230.839" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="184.615" y="234.722" />
+ <rect fill="black" height="9.221" stroke-width="0" width="11.538" x="184.615" y="236.542" />
+ <rect fill="white" height="1.335" stroke-width="0" width="11.538" x="184.615" y="245.763" />
+ <rect fill="rgb(169,169,169)" height="3.761" stroke-width="0" width="11.538" x="184.615" y="247.098" />
+ <rect fill="white" height="1.092" stroke-width="0" width="11.538" x="184.615" y="250.859" />
+ <rect fill="grey" height="4.732" stroke-width="0" width="11.538" x="184.615" y="251.951" />
+ <rect fill="white" height="4.732" stroke-width="0" width="11.538" x="184.615" y="256.683" />
+ <rect fill="rgb(169,169,169)" height="4.611" stroke-width="0" width="11.538" x="184.615" y="261.416" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="184.615" y="266.026" />
+ <rect fill="rgb(169,169,169)" height="4.125" stroke-width="0" width="11.538" x="184.615" y="269.424" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="184.615" y="273.549" />
+ <rect fill="grey" height="3.640" stroke-width="0" width="11.538" x="184.615" y="277.796" />
+ <rect fill="white" height="6.431" stroke-width="0" width="11.538" x="184.615" y="281.436" />
+ <rect fill="grey" height="4.247" stroke-width="0" width="11.538" x="184.615" y="287.867" />
+ <rect fill="white" height="7.887" stroke-width="0" width="11.538" x="184.615" y="292.113" />
+ <rect fill="none" height="73.408" rx="10" ry="10" width="11.538" x="184.615" y="92.639" />
+ <rect fill="none" height="133.954" rx="10" ry="10" width="11.538" x="184.615" y="166.046" />
+ </g>
+ <text x="186.385" y="315.000">6
+ </text>
+ </g>
+ <g id="chr7">
+ <clipPath id="chr7_clipPath">
+ <rect height="71.709" rx="10" ry="10" width="11.538" x="215.385" y="107.294" />
+ <rect height="120.997" rx="10" ry="10" width="11.538" x="215.385" y="179.003" />
+ </clipPath>
+ <g clip-path="url(#chr7_clipPath)">
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="215.385" y="107.294" />
+ <rect fill="LightGrey" height="2.912" stroke-width="0" width="11.538" x="215.385" y="109.842" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="215.385" y="112.754" />
+ <rect fill="black" height="7.401" stroke-width="0" width="11.538" x="215.385" y="116.030" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="215.385" y="123.432" />
+ <rect fill="black" height="5.217" stroke-width="0" width="11.538" x="215.385" y="125.737" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="215.385" y="130.955" />
+ <rect fill="grey" height="3.761" stroke-width="0" width="11.538" x="215.385" y="137.507" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="215.385" y="141.268" />
+ <rect fill="rgb(169,169,169)" height="4.611" stroke-width="0" width="11.538" x="215.385" y="145.879" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="215.385" y="150.490" />
+ <rect fill="rgb(169,169,169)" height="7.037" stroke-width="0" width="11.538" x="215.385" y="152.795" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="215.385" y="159.832" />
+ <rect fill="rgb(169,169,169)" height="3.883" stroke-width="0" width="11.538" x="215.385" y="163.836" />
+ <rect fill="white" height="1.335" stroke-width="0" width="11.538" x="215.385" y="167.719" />
+ <rect fill="rgb(169,169,169)" height="3.640" stroke-width="0" width="11.538" x="215.385" y="169.054" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="215.385" y="172.694" />
+ <rect fill="DarkGrey" height="2.063" stroke-width="0" width="11.538" x="215.385" y="176.940" />
+ <rect fill="DarkGrey" height="2.427" stroke-width="0" width="11.538" x="215.385" y="179.003" />
+ <rect fill="white" height="6.067" stroke-width="0" width="11.538" x="215.385" y="181.430" />
+ <rect fill="grey" height="6.916" stroke-width="0" width="11.538" x="215.385" y="187.497" />
+ <rect fill="white" height="6.795" stroke-width="0" width="11.538" x="215.385" y="194.413" />
+ <rect fill="black" height="10.677" stroke-width="0" width="11.538" x="215.385" y="201.207" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="215.385" y="211.885" />
+ <rect fill="rgb(169,169,169)" height="3.519" stroke-width="0" width="11.538" x="215.385" y="214.069" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="215.385" y="217.588" />
+ <rect fill="rgb(169,169,169)" height="6.431" stroke-width="0" width="11.538" x="215.385" y="219.650" />
+ <rect fill="white" height="7.887" stroke-width="0" width="11.538" x="215.385" y="226.081" />
+ <rect fill="grey" height="1.820" stroke-width="0" width="11.538" x="215.385" y="233.968" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="215.385" y="235.788" />
+ <rect fill="rgb(169,169,169)" height="8.736" stroke-width="0" width="11.538" x="215.385" y="237.365" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="215.385" y="246.101" />
+ <rect fill="rgb(169,169,169)" height="4.489" stroke-width="0" width="11.538" x="215.385" y="249.499" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="215.385" y="253.988" />
+ <rect fill="rgb(169,169,169)" height="4.004" stroke-width="0" width="11.538" x="215.385" y="257.264" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="215.385" y="261.268" />
+ <rect fill="LightGrey" height="1.335" stroke-width="0" width="11.538" x="215.385" y="263.816" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="215.385" y="265.151" />
+ <rect fill="grey" height="5.945" stroke-width="0" width="11.538" x="215.385" y="267.942" />
+ <rect fill="white" height="6.673" stroke-width="0" width="11.538" x="215.385" y="273.887" />
+ <rect fill="rgb(169,169,169)" height="5.703" stroke-width="0" width="11.538" x="215.385" y="280.560" />
+ <rect fill="white" height="5.703" stroke-width="0" width="11.538" x="215.385" y="286.263" />
+ <rect fill="LightGrey" height="3.033" stroke-width="0" width="11.538" x="215.385" y="291.966" />
+ <rect fill="white" height="5.001" stroke-width="0" width="11.538" x="215.385" y="294.999" />
+ <rect fill="none" height="71.709" rx="10" ry="10" width="11.538" x="215.385" y="107.294" />
+ <rect fill="none" height="120.997" rx="10" ry="10" width="11.538" x="215.385" y="179.003" />
+ </g>
+ <text x="217.154" y="315.000">7
+ </text>
+ </g>
+ <g id="chr8">
+ <rect fill="" height="1" stroke="" width="11" x="252" y="205" />
+ <clipPath id="chr8_clipPath">
+ <rect height="54.843" rx="10" ry="10" width="11.538" x="246.154" y="122.518" />
+ <rect height="122.639" rx="10" ry="10" width="11.538" x="246.154" y="177.361" />
+ </clipPath>
+ <g clip-path="url(#chr8_clipPath)">
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="246.154" y="122.518" />
+ <rect fill="rgb(169,169,169)" height="4.853" stroke-width="0" width="11.538" x="246.154" y="125.187" />
+ <rect fill="white" height="7.887" stroke-width="0" width="11.538" x="246.154" y="130.040" />
+ <rect fill="black" height="7.765" stroke-width="0" width="11.538" x="246.154" y="137.927" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="246.154" y="145.693" />
+ <rect fill="grey" height="4.853" stroke-width="0" width="11.538" x="246.154" y="150.910" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="246.154" y="155.763" />
+ <rect fill="rgb(169,169,169)" height="10.677" stroke-width="0" width="11.538" x="246.154" y="158.554" />
+ <rect fill="white" height="1.213" stroke-width="0" width="11.538" x="246.154" y="169.232" />
+ <rect fill="LightGrey" height="0.485" stroke-width="0" width="11.538" x="246.154" y="170.445" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="246.154" y="170.930" />
+ <rect fill="DarkGrey" height="2.427" stroke-width="0" width="11.538" x="246.154" y="174.934" />
+ <rect fill="DarkGrey" height="3.519" stroke-width="0" width="11.538" x="246.154" y="177.361" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="246.154" y="180.880" />
+ <rect fill="rgb(169,169,169)" height="2.912" stroke-width="0" width="11.538" x="246.154" y="183.670" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="246.154" y="186.582" />
+ <rect fill="grey" height="7.401" stroke-width="0" width="11.538" x="246.154" y="189.980" />
+ <rect fill="white" height="0.849" stroke-width="0" width="11.538" x="246.154" y="197.381" />
+ <rect fill="grey" height="4.489" stroke-width="0" width="11.538" x="246.154" y="198.231" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="246.154" y="202.720" />
+ <rect fill="grey" height="3.033" stroke-width="0" width="11.538" x="246.154" y="205.147" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="246.154" y="208.180" />
+ <rect fill="black" height="5.460" stroke-width="0" width="11.538" x="246.154" y="212.305" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="246.154" y="217.766" />
+ <rect fill="rgb(169,169,169)" height="5.581" stroke-width="0" width="11.538" x="246.154" y="219.950" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="246.154" y="225.531" />
+ <rect fill="black" height="7.644" stroke-width="0" width="11.538" x="246.154" y="228.322" />
+ <rect fill="white" height="6.795" stroke-width="0" width="11.538" x="246.154" y="235.966" />
+ <rect fill="LightGrey" height="3.033" stroke-width="0" width="11.538" x="246.154" y="242.761" />
+ <rect fill="white" height="5.460" stroke-width="0" width="11.538" x="246.154" y="245.794" />
+ <rect fill="rgb(169,169,169)" height="5.460" stroke-width="0" width="11.538" x="246.154" y="251.254" />
+ <rect fill="white" height="1.941" stroke-width="0" width="11.538" x="246.154" y="256.714" />
+ <rect fill="black" height="6.673" stroke-width="0" width="11.538" x="246.154" y="258.655" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="246.154" y="265.329" />
+ <rect fill="grey" height="4.004" stroke-width="0" width="11.538" x="246.154" y="267.149" />
+ <rect fill="white" height="5.824" stroke-width="0" width="11.538" x="246.154" y="271.153" />
+ <rect fill="grey" height="5.096" stroke-width="0" width="11.538" x="246.154" y="276.977" />
+ <rect fill="white" height="6.067" stroke-width="0" width="11.538" x="246.154" y="282.073" />
+ <rect fill="rgb(169,169,169)" height="4.247" stroke-width="0" width="11.538" x="246.154" y="288.140" />
+ <rect fill="white" height="7.614" stroke-width="0" width="11.538" x="246.154" y="292.386" />
+ <rect fill="none" height="54.843" rx="10" ry="10" width="11.538" x="246.154" y="122.518" />
+ <rect fill="none" height="122.639" rx="10" ry="10" width="11.538" x="246.154" y="177.361" />
+ </g>
+ <text x="247.923" y="315.000">8
+ </text>
+ </g>
+ <g id="chr9">
+ <rect fill="" height="1" stroke="" width="11" x="283" y="248" />
+ <rect fill="" height="1" stroke="" width="11" x="283" y="217" />
+ <clipPath id="chr9_clipPath">
+ <rect height="62.851" rx="10" ry="10" width="11.538" x="276.923" y="129.800" />
+ <rect height="107.349" rx="10" ry="10" width="11.538" x="276.923" y="192.651" />
+ </clipPath>
+ <g clip-path="url(#chr9_clipPath)">
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="276.923" y="129.800" />
+ <rect fill="LightGrey" height="2.912" stroke-width="0" width="11.538" x="276.923" y="132.469" />
+ <rect fill="white" height="5.339" stroke-width="0" width="11.538" x="276.923" y="135.381" />
+ <rect fill="rgb(169,169,169)" height="6.188" stroke-width="0" width="11.538" x="276.923" y="140.720" />
+ <rect fill="white" height="3.033" stroke-width="0" width="11.538" x="276.923" y="146.908" />
+ <rect fill="LightGrey" height="2.305" stroke-width="0" width="11.538" x="276.923" y="149.941" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="276.923" y="152.247" />
+ <rect fill="black" height="6.795" stroke-width="0" width="11.538" x="276.923" y="153.945" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="276.923" y="160.740" />
+ <rect fill="black" height="5.703" stroke-width="0" width="11.538" x="276.923" y="163.895" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="276.923" y="169.598" />
+ <rect fill="LightGrey" height="2.063" stroke-width="0" width="11.538" x="276.923" y="173.844" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="276.923" y="175.907" />
+ <rect fill="grey" height="2.669" stroke-width="0" width="11.538" x="276.923" y="178.576" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="276.923" y="181.246" />
+ <rect fill="DarkGrey" height="6.188" stroke-width="0" width="11.538" x="276.923" y="186.463" />
+ <rect fill="DarkGrey" height="10.313" stroke-width="0" width="11.538" x="276.923" y="192.651" />
+ <rect fill="LightGrey" height="11.769" stroke-width="0" width="11.538" x="276.923" y="202.965" />
+ <rect fill="white" height="0.607" stroke-width="0" width="11.538" x="276.923" y="214.734" />
+ <rect fill="LightGrey" height="2.669" stroke-width="0" width="11.538" x="276.923" y="215.341" />
+ <rect fill="white" height="0.485" stroke-width="0" width="11.538" x="276.923" y="218.010" />
+ <rect fill="grey" height="7.523" stroke-width="0" width="11.538" x="276.923" y="218.495" />
+ <rect fill="white" height="1.213" stroke-width="0" width="11.538" x="276.923" y="226.018" />
+ <rect fill="grey" height="3.761" stroke-width="0" width="11.538" x="276.923" y="227.232" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="276.923" y="230.993" />
+ <rect fill="grey" height="4.247" stroke-width="0" width="11.538" x="276.923" y="234.269" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="276.923" y="238.516" />
+ <rect fill="LightGrey" height="2.427" stroke-width="0" width="11.538" x="276.923" y="240.214" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="276.923" y="242.641" />
+ <rect fill="LightGrey" height="3.155" stroke-width="0" width="11.538" x="276.923" y="245.796" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="276.923" y="248.950" />
+ <rect fill="black" height="6.795" stroke-width="0" width="11.538" x="276.923" y="253.076" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="276.923" y="259.871" />
+ <rect fill="LightGrey" height="4.368" stroke-width="0" width="11.538" x="276.923" y="263.632" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="276.923" y="268.000" />
+ <rect fill="rgb(169,169,169)" height="6.431" stroke-width="0" width="11.538" x="276.923" y="271.397" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="276.923" y="277.828" />
+ <rect fill="LightGrey" height="4.247" stroke-width="0" width="11.538" x="276.923" y="282.439" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="276.923" y="286.686" />
+ <rect fill="LightGrey" height="0.364" stroke-width="0" width="11.538" x="276.923" y="290.568" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="276.923" y="290.932" />
+ <rect fill="LightGrey" height="2.063" stroke-width="0" width="11.538" x="276.923" y="293.480" />
+ <rect fill="white" height="4.457" stroke-width="0" width="11.538" x="276.923" y="295.543" />
+ <rect fill="none" height="62.851" rx="10" ry="10" width="11.538" x="276.923" y="129.800" />
+ <rect fill="none" height="107.349" rx="10" ry="10" width="11.538" x="276.923" y="192.651" />
+ </g>
+ <text x="278.692" y="315.000">9
+ </text>
+ </g>
+ <g id="chr10">
+ <clipPath id="chr10_clipPath">
+ <rect height="48.898" rx="10" ry="10" width="11.538" x="307.692" y="135.743" />
+ <rect height="115.359" rx="10" ry="10" width="11.538" x="307.692" y="184.641" />
+ </clipPath>
+ <g clip-path="url(#chr10_clipPath)">
+ <rect fill="white" height="3.640" stroke-width="0" width="11.538" x="307.692" y="135.743" />
+ <rect fill="LightGrey" height="0.971" stroke-width="0" width="11.538" x="307.692" y="139.383" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="307.692" y="140.354" />
+ <rect fill="rgb(169,169,169)" height="6.795" stroke-width="0" width="11.538" x="307.692" y="143.873" />
+ <rect fill="white" height="6.067" stroke-width="0" width="11.538" x="307.692" y="150.667" />
+ <rect fill="rgb(169,169,169)" height="3.155" stroke-width="0" width="11.538" x="307.692" y="156.734" />
+ <rect fill="white" height="0.728" stroke-width="0" width="11.538" x="307.692" y="159.889" />
+ <rect fill="rgb(169,169,169)" height="2.791" stroke-width="0" width="11.538" x="307.692" y="160.617" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="307.692" y="163.408" />
+ <rect fill="grey" height="5.096" stroke-width="0" width="11.538" x="307.692" y="164.985" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="307.692" y="170.081" />
+ <rect fill="LightGrey" height="3.761" stroke-width="0" width="11.538" x="307.692" y="173.842" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="307.692" y="177.604" />
+ <rect fill="DarkGrey" height="1.820" stroke-width="0" width="11.538" x="307.692" y="182.821" />
+ <rect fill="DarkGrey" height="2.184" stroke-width="0" width="11.538" x="307.692" y="184.641" />
+ <rect fill="white" height="4.853" stroke-width="0" width="11.538" x="307.692" y="186.825" />
+ <rect fill="LightGrey" height="4.853" stroke-width="0" width="11.538" x="307.692" y="191.679" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="307.692" y="196.532" />
+ <rect fill="black" height="9.585" stroke-width="0" width="11.538" x="307.692" y="200.415" />
+ <rect fill="white" height="4.368" stroke-width="0" width="11.538" x="307.692" y="210.000" />
+ <rect fill="black" height="7.887" stroke-width="0" width="11.538" x="307.692" y="214.368" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="307.692" y="222.255" />
+ <rect fill="grey" height="3.397" stroke-width="0" width="11.538" x="307.692" y="226.259" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="307.692" y="229.656" />
+ <rect fill="black" height="7.159" stroke-width="0" width="11.538" x="307.692" y="235.238" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="307.692" y="242.397" />
+ <rect fill="rgb(169,169,169)" height="4.004" stroke-width="0" width="11.538" x="307.692" y="244.459" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="307.692" y="248.463" />
+ <rect fill="grey" height="4.611" stroke-width="0" width="11.538" x="307.692" y="250.041" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="307.692" y="254.651" />
+ <rect fill="grey" height="3.155" stroke-width="0" width="11.538" x="307.692" y="256.350" />
+ <rect fill="white" height="1.213" stroke-width="0" width="11.538" x="307.692" y="259.505" />
+ <rect fill="LightGrey" height="2.305" stroke-width="0" width="11.538" x="307.692" y="260.718" />
+ <rect fill="white" height="0.971" stroke-width="0" width="11.538" x="307.692" y="263.024" />
+ <rect fill="black" height="7.401" stroke-width="0" width="11.538" x="307.692" y="263.994" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="307.692" y="271.396" />
+ <rect fill="rgb(169,169,169)" height="5.096" stroke-width="0" width="11.538" x="307.692" y="275.157" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="307.692" y="280.253" />
+ <rect fill="grey" height="1.699" stroke-width="0" width="11.538" x="307.692" y="283.408" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="307.692" y="285.106" />
+ <rect fill="grey" height="3.761" stroke-width="0" width="11.538" x="307.692" y="290.324" />
+ <rect fill="white" height="5.915" stroke-width="0" width="11.538" x="307.692" y="294.085" />
+ <rect fill="none" height="48.898" rx="10" ry="10" width="11.538" x="307.692" y="135.743" />
+ <rect fill="none" height="115.359" rx="10" ry="10" width="11.538" x="307.692" y="184.641" />
+ </g>
+ <text x="305.462" y="315.000">10
+ </text>
+ </g>
+ <g id="chr11">
+ <rect fill="" height="1" stroke="" width="11" x="344" y="213" />
+ <clipPath id="chr11_clipPath">
+ <rect height="64.186" rx="10" ry="10" width="11.538" x="338.462" y="136.862" />
+ <rect height="98.951" rx="10" ry="10" width="11.538" x="338.462" y="201.049" />
+ </clipPath>
+ <g clip-path="url(#chr11_clipPath)">
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="338.462" y="136.862" />
+ <rect fill="grey" height="9.585" stroke-width="0" width="11.538" x="338.462" y="140.260" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="338.462" y="149.845" />
+ <rect fill="grey" height="4.247" stroke-width="0" width="11.538" x="338.462" y="152.151" />
+ <rect fill="white" height="6.673" stroke-width="0" width="11.538" x="338.462" y="156.397" />
+ <rect fill="black" height="5.339" stroke-width="0" width="11.538" x="338.462" y="163.071" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="338.462" y="168.409" />
+ <rect fill="rgb(169,169,169)" height="4.611" stroke-width="0" width="11.538" x="338.462" y="169.866" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="338.462" y="174.476" />
+ <rect fill="black" height="8.493" stroke-width="0" width="11.538" x="338.462" y="181.028" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="338.462" y="189.522" />
+ <rect fill="rgb(169,169,169)" height="3.155" stroke-width="0" width="11.538" x="338.462" y="196.074" />
+ <rect fill="DarkGrey" height="1.820" stroke-width="0" width="11.538" x="338.462" y="199.229" />
+ <rect fill="DarkGrey" height="4.247" stroke-width="0" width="11.538" x="338.462" y="201.049" />
+ <rect fill="rgb(169,169,169)" height="4.004" stroke-width="0" width="11.538" x="338.462" y="205.295" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="338.462" y="209.299" />
+ <rect fill="LightGrey" height="2.063" stroke-width="0" width="11.538" x="338.462" y="211.362" />
+ <rect fill="white" height="4.853" stroke-width="0" width="11.538" x="338.462" y="213.425" />
+ <rect fill="LightGrey" height="2.548" stroke-width="0" width="11.538" x="338.462" y="218.278" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="338.462" y="220.826" />
+ <rect fill="grey" height="5.096" stroke-width="0" width="11.538" x="338.462" y="222.646" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="338.462" y="227.742" />
+ <rect fill="black" height="10.435" stroke-width="0" width="11.538" x="338.462" y="229.926" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="338.462" y="240.361" />
+ <rect fill="black" height="5.339" stroke-width="0" width="11.538" x="338.462" y="243.516" />
+ <rect fill="white" height="5.339" stroke-width="0" width="11.538" x="338.462" y="248.854" />
+ <rect fill="black" height="5.945" stroke-width="0" width="11.538" x="338.462" y="254.193" />
+ <rect fill="white" height="0.971" stroke-width="0" width="11.538" x="338.462" y="260.139" />
+ <rect fill="black" height="9.221" stroke-width="0" width="11.538" x="338.462" y="261.109" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="338.462" y="270.331" />
+ <rect fill="grey" height="3.155" stroke-width="0" width="11.538" x="338.462" y="273.728" />
+ <rect fill="white" height="6.431" stroke-width="0" width="11.538" x="338.462" y="276.883" />
+ <rect fill="grey" height="3.397" stroke-width="0" width="11.538" x="338.462" y="283.314" />
+ <rect fill="white" height="4.732" stroke-width="0" width="11.538" x="338.462" y="286.711" />
+ <rect fill="grey" height="3.519" stroke-width="0" width="11.538" x="338.462" y="291.443" />
+ <rect fill="white" height="5.038" stroke-width="0" width="11.538" x="338.462" y="294.962" />
+ <rect fill="none" height="64.186" rx="10" ry="10" width="11.538" x="338.462" y="136.862" />
+ <rect fill="none" height="98.951" rx="10" ry="10" width="11.538" x="338.462" y="201.049" />
+ </g>
+ <text x="336.231" y="315.000">11
+ </text>
+ </g>
+ <g id="chr12">
+ <clipPath id="chr12_clipPath">
+ <rect height="42.953" rx="10" ry="10" width="11.538" x="369.231" y="139.414" />
+ <rect height="117.634" rx="10" ry="10" width="11.538" x="369.231" y="182.366" />
+ </clipPath>
+ <g clip-path="url(#chr12_clipPath)">
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="369.231" y="139.414" />
+ <rect fill="LightGrey" height="2.669" stroke-width="0" width="11.538" x="369.231" y="143.175" />
+ <rect fill="white" height="5.703" stroke-width="0" width="11.538" x="369.231" y="145.845" />
+ <rect fill="rgb(169,169,169)" height="3.155" stroke-width="0" width="11.538" x="369.231" y="151.547" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="369.231" y="154.702" />
+ <rect fill="black" height="6.188" stroke-width="0" width="11.538" x="369.231" y="157.371" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="369.231" y="163.560" />
+ <rect fill="black" height="6.188" stroke-width="0" width="11.538" x="369.231" y="165.137" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="369.231" y="171.325" />
+ <rect fill="grey" height="3.519" stroke-width="0" width="11.538" x="369.231" y="173.024" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="369.231" y="176.542" />
+ <rect fill="DarkGrey" height="2.669" stroke-width="0" width="11.538" x="369.231" y="179.697" />
+ <rect fill="DarkGrey" height="1.335" stroke-width="0" width="11.538" x="369.231" y="182.366" />
+ <rect fill="black" height="9.828" stroke-width="0" width="11.538" x="369.231" y="183.701" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="369.231" y="193.529" />
+ <rect fill="LightGrey" height="1.213" stroke-width="0" width="11.538" x="369.231" y="196.927" />
+ <rect fill="white" height="5.703" stroke-width="0" width="11.538" x="369.231" y="198.140" />
+ <rect fill="LightGrey" height="2.548" stroke-width="0" width="11.538" x="369.231" y="203.843" />
+ <rect fill="white" height="1.335" stroke-width="0" width="11.538" x="369.231" y="206.391" />
+ <rect fill="rgb(169,169,169)" height="6.188" stroke-width="0" width="11.538" x="369.231" y="207.725" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="369.231" y="213.914" />
+ <rect fill="grey" height="3.155" stroke-width="0" width="11.538" x="369.231" y="216.340" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="369.231" y="219.495" />
+ <rect fill="rgb(169,169,169)" height="5.217" stroke-width="0" width="11.538" x="369.231" y="224.106" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="369.231" y="229.323" />
+ <rect fill="black" height="7.765" stroke-width="0" width="11.538" x="369.231" y="234.904" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="369.231" y="242.670" />
+ <rect fill="black" height="4.489" stroke-width="0" width="11.538" x="369.231" y="245.582" />
+ <rect fill="white" height="4.368" stroke-width="0" width="11.538" x="369.231" y="250.071" />
+ <rect fill="rgb(169,169,169)" height="6.309" stroke-width="0" width="11.538" x="369.231" y="254.439" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="369.231" y="260.749" />
+ <rect fill="grey" height="6.188" stroke-width="0" width="11.538" x="369.231" y="263.661" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="369.231" y="269.849" />
+ <rect fill="LightGrey" height="0.728" stroke-width="0" width="11.538" x="369.231" y="273.125" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="369.231" y="273.853" />
+ <rect fill="grey" height="3.033" stroke-width="0" width="11.538" x="369.231" y="276.280" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="369.231" y="279.313" />
+ <rect fill="grey" height="2.912" stroke-width="0" width="11.538" x="369.231" y="281.012" />
+ <rect fill="white" height="6.552" stroke-width="0" width="11.538" x="369.231" y="283.924" />
+ <rect fill="grey" height="5.096" stroke-width="0" width="11.538" x="369.231" y="290.476" />
+ <rect fill="white" height="4.428" stroke-width="0" width="11.538" x="369.231" y="295.572" />
+ <rect fill="none" height="42.953" rx="10" ry="10" width="11.538" x="369.231" y="139.414" />
+ <rect fill="none" height="117.634" rx="10" ry="10" width="11.538" x="369.231" y="182.366" />
+ </g>
+ <text x="367.000" y="315.000">12
+ </text>
+ </g>
+ <g id="chr13">
+ <rect fill="" height="1" stroke="" width="11" x="406" y="239" />
+ <rect fill="" height="1" stroke="" width="11" x="406" y="217" />
+ <clipPath id="chr13_clipPath">
+ <rect height="19.414" rx="10" ry="10" width="11.538" x="400.000" y="161.505" />
+ <rect height="119.082" rx="10" ry="10" width="11.538" x="400.000" y="180.918" />
+ </clipPath>
+ <g clip-path="url(#chr13_clipPath)">
+ <rect fill="LightGrey" height="4.611" stroke-width="0" width="11.538" x="400.000" y="161.505" />
+ <rect fill="rgb(169,169,169)" height="5.460" stroke-width="0" width="11.538" x="400.000" y="166.116" />
+ <rect fill="LightGrey" height="6.309" stroke-width="0" width="11.538" x="400.000" y="171.576" />
+ <rect fill="DarkGrey" height="3.033" stroke-width="0" width="11.538" x="400.000" y="177.885" />
+ <rect fill="DarkGrey" height="2.912" stroke-width="0" width="11.538" x="400.000" y="180.918" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="400.000" y="183.830" />
+ <rect fill="LightGrey" height="2.669" stroke-width="0" width="11.538" x="400.000" y="188.441" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="400.000" y="191.111" />
+ <rect fill="LightGrey" height="1.335" stroke-width="0" width="11.538" x="400.000" y="193.901" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="400.000" y="195.236" />
+ <rect fill="grey" height="2.184" stroke-width="0" width="11.538" x="400.000" y="199.240" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="400.000" y="201.424" />
+ <rect fill="rgb(169,169,169)" height="5.824" stroke-width="0" width="11.538" x="400.000" y="203.608" />
+ <rect fill="white" height="5.824" stroke-width="0" width="11.538" x="400.000" y="209.432" />
+ <rect fill="LightGrey" height="1.941" stroke-width="0" width="11.538" x="400.000" y="215.256" />
+ <rect fill="white" height="0.364" stroke-width="0" width="11.538" x="400.000" y="217.198" />
+ <rect fill="grey" height="3.276" stroke-width="0" width="11.538" x="400.000" y="217.562" />
+ <rect fill="white" height="4.004" stroke-width="0" width="11.538" x="400.000" y="220.838" />
+ <rect fill="black" height="6.552" stroke-width="0" width="11.538" x="400.000" y="224.842" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="400.000" y="231.394" />
+ <rect fill="rgb(169,169,169)" height="4.368" stroke-width="0" width="11.538" x="400.000" y="234.912" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="400.000" y="239.280" />
+ <rect fill="black" height="5.945" stroke-width="0" width="11.538" x="400.000" y="243.042" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="400.000" y="248.987" />
+ <rect fill="grey" height="2.184" stroke-width="0" width="11.538" x="400.000" y="251.535" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="400.000" y="253.719" />
+ <rect fill="black" height="10.556" stroke-width="0" width="11.538" x="400.000" y="255.903" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="400.000" y="266.459" />
+ <rect fill="black" height="6.067" stroke-width="0" width="11.538" x="400.000" y="269.250" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="400.000" y="275.317" />
+ <rect fill="LightGrey" height="1.335" stroke-width="0" width="11.538" x="400.000" y="279.200" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="400.000" y="280.534" />
+ <rect fill="black" height="3.883" stroke-width="0" width="11.538" x="400.000" y="283.446" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="400.000" y="287.329" />
+ <rect fill="black" height="4.004" stroke-width="0" width="11.538" x="400.000" y="289.877" />
+ <rect fill="white" height="6.119" stroke-width="0" width="11.538" x="400.000" y="293.881" />
+ <rect fill="none" height="19.414" rx="10" ry="10" width="11.538" x="400.000" y="161.505" />
+ <rect fill="none" height="119.082" rx="10" ry="10" width="11.538" x="400.000" y="180.918" />
+ </g>
+ <text x="397.769" y="315.000">13
+ </text>
+ </g>
+ <g id="chr14">
+ <rect fill="" height="1" stroke="" width="11" x="437" y="256" />
+ <clipPath id="chr14_clipPath">
+ <rect height="18.928" rx="10" ry="10" width="11.538" x="430.769" y="170.938" />
+ <rect height="110.134" rx="10" ry="10" width="11.538" x="430.769" y="189.866" />
+ </clipPath>
+ <g clip-path="url(#chr14_clipPath)">
+ <rect fill="LightGrey" height="3.761" stroke-width="0" width="11.538" x="430.769" y="170.938" />
+ <rect fill="rgb(169,169,169)" height="4.368" stroke-width="0" width="11.538" x="430.769" y="174.699" />
+ <rect fill="LightGrey" height="8.372" stroke-width="0" width="11.538" x="430.769" y="179.067" />
+ <rect fill="DarkGrey" height="2.427" stroke-width="0" width="11.538" x="430.769" y="187.439" />
+ <rect fill="DarkGrey" height="4.247" stroke-width="0" width="11.538" x="430.769" y="189.866" />
+ <rect fill="white" height="5.460" stroke-width="0" width="11.538" x="430.769" y="194.113" />
+ <rect fill="black" height="9.949" stroke-width="0" width="11.538" x="430.769" y="199.573" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="430.769" y="209.522" />
+ <rect fill="grey" height="1.820" stroke-width="0" width="11.538" x="430.769" y="212.313" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="430.769" y="214.133" />
+ <rect fill="black" height="4.975" stroke-width="0" width="11.538" x="430.769" y="215.710" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="430.769" y="220.685" />
+ <rect fill="black" height="6.188" stroke-width="0" width="11.538" x="430.769" y="223.355" />
+ <rect fill="white" height="4.853" stroke-width="0" width="11.538" x="430.769" y="229.543" />
+ <rect fill="LightGrey" height="2.548" stroke-width="0" width="11.538" x="430.769" y="234.396" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="430.769" y="236.944" />
+ <rect fill="rgb(169,169,169)" height="6.552" stroke-width="0" width="11.538" x="430.769" y="238.643" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="430.769" y="245.195" />
+ <rect fill="grey" height="3.640" stroke-width="0" width="11.538" x="430.769" y="248.592" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="430.769" y="252.232" />
+ <rect fill="grey" height="4.368" stroke-width="0" width="11.538" x="430.769" y="255.023" />
+ <rect fill="white" height="6.673" stroke-width="0" width="11.538" x="430.769" y="259.391" />
+ <rect fill="black" height="5.096" stroke-width="0" width="11.538" x="430.769" y="266.064" />
+ <rect fill="white" height="1.699" stroke-width="0" width="11.538" x="430.769" y="271.160" />
+ <rect fill="black" height="5.945" stroke-width="0" width="11.538" x="430.769" y="272.859" />
+ <rect fill="white" height="1.941" stroke-width="0" width="11.538" x="430.769" y="278.805" />
+ <rect fill="LightGrey" height="2.791" stroke-width="0" width="11.538" x="430.769" y="280.746" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="430.769" y="283.537" />
+ <rect fill="grey" height="6.067" stroke-width="0" width="11.538" x="430.769" y="286.691" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="430.769" y="292.758" />
+ <rect fill="grey" height="0.971" stroke-width="0" width="11.538" x="430.769" y="294.942" />
+ <rect fill="white" height="4.087" stroke-width="0" width="11.538" x="430.769" y="295.913" />
+ <rect fill="none" height="18.928" rx="10" ry="10" width="11.538" x="430.769" y="170.938" />
+ <rect fill="none" height="110.134" rx="10" ry="10" width="11.538" x="430.769" y="189.866" />
+ </g>
+ <text x="428.538" y="315.000">14
+ </text>
+ </g>
+ <g id="chr15">
+ <rect fill="" height="1" stroke="" width="11" x="467" y="272" />
+ <clipPath id="chr15_clipPath">
+ <rect height="20.627" rx="10" ry="10" width="11.538" x="461.538" y="178.254" />
+ <rect height="101.119" rx="10" ry="10" width="11.538" x="461.538" y="198.881" />
+ </clipPath>
+ <g clip-path="url(#chr15_clipPath)">
+ <rect fill="LightGrey" height="4.247" stroke-width="0" width="11.538" x="461.538" y="178.254" />
+ <rect fill="rgb(169,169,169)" height="5.339" stroke-width="0" width="11.538" x="461.538" y="182.501" />
+ <rect fill="LightGrey" height="7.523" stroke-width="0" width="11.538" x="461.538" y="187.839" />
+ <rect fill="DarkGrey" height="3.519" stroke-width="0" width="11.538" x="461.538" y="195.362" />
+ <rect fill="DarkGrey" height="1.699" stroke-width="0" width="11.538" x="461.538" y="198.881" />
+ <rect fill="white" height="5.945" stroke-width="0" width="11.538" x="461.538" y="200.580" />
+ <rect fill="grey" height="2.912" stroke-width="0" width="11.538" x="461.538" y="206.525" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="461.538" y="209.437" />
+ <rect fill="grey" height="1.213" stroke-width="0" width="11.538" x="461.538" y="212.228" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="461.538" y="213.441" />
+ <rect fill="rgb(169,169,169)" height="7.887" stroke-width="0" width="11.538" x="461.538" y="216.353" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="461.538" y="224.240" />
+ <rect fill="LightGrey" height="0.849" stroke-width="0" width="11.538" x="461.538" y="227.637" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="461.538" y="228.487" />
+ <rect fill="rgb(169,169,169)" height="5.945" stroke-width="0" width="11.538" x="461.538" y="230.064" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="461.538" y="236.009" />
+ <rect fill="rgb(169,169,169)" height="5.703" stroke-width="0" width="11.538" x="461.538" y="240.256" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="461.538" y="245.959" />
+ <rect fill="LightGrey" height="5.339" stroke-width="0" width="11.538" x="461.538" y="247.536" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="461.538" y="252.875" />
+ <rect fill="LightGrey" height="0.121" stroke-width="0" width="11.538" x="461.538" y="257.000" />
+ <rect fill="white" height="0.364" stroke-width="0" width="11.538" x="461.538" y="257.122" />
+ <rect fill="LightGrey" height="6.188" stroke-width="0" width="11.538" x="461.538" y="257.486" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="461.538" y="263.674" />
+ <rect fill="LightGrey" height="1.577" stroke-width="0" width="11.538" x="461.538" y="266.950" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="461.538" y="268.527" />
+ <rect fill="grey" height="4.125" stroke-width="0" width="11.538" x="461.538" y="270.590" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="461.538" y="274.715" />
+ <rect fill="grey" height="4.732" stroke-width="0" width="11.538" x="461.538" y="278.962" />
+ <rect fill="white" height="6.309" stroke-width="0" width="11.538" x="461.538" y="283.694" />
+ <rect fill="grey" height="5.096" stroke-width="0" width="11.538" x="461.538" y="290.003" />
+ <rect fill="white" height="4.901" stroke-width="0" width="11.538" x="461.538" y="295.099" />
+ <rect fill="none" height="20.627" rx="10" ry="10" width="11.538" x="461.538" y="178.254" />
+ <rect fill="none" height="101.119" rx="10" ry="10" width="11.538" x="461.538" y="198.881" />
+ </g>
+ <text x="459.308" y="315.000">15
+ </text>
+ </g>
+ <g id="chr16">
+ <rect fill="" height="1" stroke="" width="11" x="498" y="279" />
+ <clipPath id="chr16_clipPath">
+ <rect height="46.350" rx="10" ry="10" width="11.538" x="492.308" y="192.222" />
+ <rect height="61.428" rx="10" ry="10" width="11.538" x="492.308" y="238.572" />
+ </clipPath>
+ <g clip-path="url(#chr16_clipPath)">
+ <rect fill="white" height="7.644" stroke-width="0" width="11.538" x="492.308" y="192.222" />
+ <rect fill="grey" height="4.853" stroke-width="0" width="11.538" x="492.308" y="199.866" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="492.308" y="204.719" />
+ <rect fill="grey" height="2.669" stroke-width="0" width="11.538" x="492.308" y="207.388" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="492.308" y="210.058" />
+ <rect fill="grey" height="4.611" stroke-width="0" width="11.538" x="492.308" y="212.485" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="492.308" y="217.095" />
+ <rect fill="grey" height="7.159" stroke-width="0" width="11.538" x="492.308" y="218.551" />
+ <rect fill="white" height="8.251" stroke-width="0" width="11.538" x="492.308" y="225.710" />
+ <rect fill="DarkGrey" height="4.611" stroke-width="0" width="11.538" x="492.308" y="233.961" />
+ <rect fill="DarkGrey" height="3.033" stroke-width="0" width="11.538" x="492.308" y="238.572" />
+ <rect fill="LightGrey" height="5.824" stroke-width="0" width="11.538" x="492.308" y="241.605" />
+ <rect fill="white" height="6.916" stroke-width="0" width="11.538" x="492.308" y="247.429" />
+ <rect fill="grey" height="4.004" stroke-width="0" width="11.538" x="492.308" y="254.345" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="492.308" y="258.349" />
+ <rect fill="black" height="10.313" stroke-width="0" width="11.538" x="492.308" y="261.018" />
+ <rect fill="white" height="5.096" stroke-width="0" width="11.538" x="492.308" y="271.332" />
+ <rect fill="grey" height="0.485" stroke-width="0" width="11.538" x="492.308" y="276.428" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="492.308" y="276.913" />
+ <rect fill="rgb(169,169,169)" height="5.945" stroke-width="0" width="11.538" x="492.308" y="281.160" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="492.308" y="287.105" />
+ <rect fill="grey" height="2.669" stroke-width="0" width="11.538" x="492.308" y="289.896" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="492.308" y="292.566" />
+ <rect fill="LightGrey" height="1.941" stroke-width="0" width="11.538" x="492.308" y="296.084" />
+ <rect fill="white" height="1.974" stroke-width="0" width="11.538" x="492.308" y="298.026" />
+ <rect fill="none" height="46.350" rx="10" ry="10" width="11.538" x="492.308" y="192.222" />
+ <rect fill="none" height="61.428" rx="10" ry="10" width="11.538" x="492.308" y="238.572" />
+ </g>
+ <text x="490.077" y="315.000">16
+ </text>
+ </g>
+ <g id="chr17">
+ <rect fill="" height="1" stroke="" width="11" x="529" y="252" />
+ <clipPath id="chr17_clipPath">
+ <rect height="26.936" rx="10" ry="10" width="11.538" x="523.077" y="204.419" />
+ <rect height="68.645" rx="10" ry="10" width="11.538" x="523.077" y="231.355" />
+ </clipPath>
+ <g clip-path="url(#chr17_clipPath)">
+ <rect fill="white" height="4.368" stroke-width="0" width="11.538" x="523.077" y="204.419" />
+ <rect fill="grey" height="3.883" stroke-width="0" width="11.538" x="523.077" y="208.787" />
+ <rect fill="white" height="5.339" stroke-width="0" width="11.538" x="523.077" y="212.670" />
+ <rect fill="rgb(169,169,169)" height="5.703" stroke-width="0" width="11.538" x="523.077" y="218.008" />
+ <rect fill="white" height="7.523" stroke-width="0" width="11.538" x="523.077" y="223.711" />
+ <rect fill="DarkGrey" height="0.121" stroke-width="0" width="11.538" x="523.077" y="231.234" />
+ <rect fill="DarkGrey" height="1.213" stroke-width="0" width="11.538" x="523.077" y="231.355" />
+ <rect fill="white" height="6.795" stroke-width="0" width="11.538" x="523.077" y="232.568" />
+ <rect fill="grey" height="8.008" stroke-width="0" width="11.538" x="523.077" y="239.363" />
+ <rect fill="white" height="0.243" stroke-width="0" width="11.538" x="523.077" y="247.371" />
+ <rect fill="LightGrey" height="2.669" stroke-width="0" width="11.538" x="523.077" y="247.614" />
+ <rect fill="white" height="4.975" stroke-width="0" width="11.538" x="523.077" y="250.283" />
+ <rect fill="LightGrey" height="3.519" stroke-width="0" width="11.538" x="523.077" y="255.258" />
+ <rect fill="white" height="3.397" stroke-width="0" width="11.538" x="523.077" y="258.777" />
+ <rect fill="rgb(169,169,169)" height="8.857" stroke-width="0" width="11.538" x="523.077" y="262.174" />
+ <rect fill="white" height="0.849" stroke-width="0" width="11.538" x="523.077" y="271.032" />
+ <rect fill="rgb(169,169,169)" height="3.397" stroke-width="0" width="11.538" x="523.077" y="271.881" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="523.077" y="275.278" />
+ <rect fill="grey" height="2.063" stroke-width="0" width="11.538" x="523.077" y="277.098" />
+ <rect fill="white" height="3.640" stroke-width="0" width="11.538" x="523.077" y="279.161" />
+ <rect fill="rgb(169,169,169)" height="4.611" stroke-width="0" width="11.538" x="523.077" y="282.801" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="523.077" y="287.412" />
+ <rect fill="LightGrey" height="0.849" stroke-width="0" width="11.538" x="523.077" y="292.023" />
+ <rect fill="white" height="7.128" stroke-width="0" width="11.538" x="523.077" y="292.872" />
+ <rect fill="none" height="26.936" rx="10" ry="10" width="11.538" x="523.077" y="204.419" />
+ <rect fill="none" height="68.645" rx="10" ry="10" width="11.538" x="523.077" y="231.355" />
+ </g>
+ <text x="520.846" y="315.000">17
+ </text>
+ </g>
+ <g id="chr18">
+ <clipPath id="chr18_clipPath">
+ <rect height="19.535" rx="10" ry="10" width="11.538" x="553.846" y="207.643" />
+ <rect height="72.822" rx="10" ry="10" width="11.538" x="553.846" y="227.178" />
+ </clipPath>
+ <g clip-path="url(#chr18_clipPath)">
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="553.846" y="207.643" />
+ <rect fill="grey" height="5.217" stroke-width="0" width="11.538" x="553.846" y="211.162" />
+ <rect fill="white" height="1.577" stroke-width="0" width="11.538" x="553.846" y="216.379" />
+ <rect fill="LightGrey" height="2.912" stroke-width="0" width="11.538" x="553.846" y="217.957" />
+ <rect fill="white" height="5.460" stroke-width="0" width="11.538" x="553.846" y="220.869" />
+ <rect fill="DarkGrey" height="0.849" stroke-width="0" width="11.538" x="553.846" y="226.329" />
+ <rect fill="DarkGrey" height="1.456" stroke-width="0" width="11.538" x="553.846" y="227.178" />
+ <rect fill="white" height="7.280" stroke-width="0" width="11.538" x="553.846" y="228.634" />
+ <rect fill="black" height="9.343" stroke-width="0" width="11.538" x="553.846" y="235.914" />
+ <rect fill="white" height="5.460" stroke-width="0" width="11.538" x="553.846" y="245.257" />
+ <rect fill="rgb(169,169,169)" height="7.644" stroke-width="0" width="11.538" x="553.846" y="250.717" />
+ <rect fill="white" height="5.581" stroke-width="0" width="11.538" x="553.846" y="258.361" />
+ <rect fill="rgb(169,169,169)" height="6.795" stroke-width="0" width="11.538" x="553.846" y="263.943" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="553.846" y="270.737" />
+ <rect fill="grey" height="3.276" stroke-width="0" width="11.538" x="553.846" y="273.650" />
+ <rect fill="white" height="3.276" stroke-width="0" width="11.538" x="553.846" y="276.926" />
+ <rect fill="black" height="6.188" stroke-width="0" width="11.538" x="553.846" y="280.202" />
+ <rect fill="white" height="2.427" stroke-width="0" width="11.538" x="553.846" y="286.390" />
+ <rect fill="LightGrey" height="5.339" stroke-width="0" width="11.538" x="553.846" y="288.816" />
+ <rect fill="white" height="5.845" stroke-width="0" width="11.538" x="553.846" y="294.155" />
+ <rect fill="none" height="19.535" rx="10" ry="10" width="11.538" x="553.846" y="207.643" />
+ <rect fill="none" height="72.822" rx="10" ry="10" width="11.538" x="553.846" y="227.178" />
+ </g>
+ <text x="551.615" y="315.000">18
+ </text>
+ </g>
+ <g id="chr19">
+ <clipPath id="chr19_clipPath">
+ <rect height="34.580" rx="10" ry="10" width="11.538" x="584.615" y="222.574" />
+ <rect height="42.845" rx="10" ry="10" width="11.538" x="584.615" y="257.155" />
+ </clipPath>
+ <g clip-path="url(#chr19_clipPath)">
+ <rect fill="white" height="8.372" stroke-width="0" width="11.538" x="584.615" y="222.574" />
+ <rect fill="LightGrey" height="6.916" stroke-width="0" width="11.538" x="584.615" y="230.946" />
+ <rect fill="white" height="1.456" stroke-width="0" width="11.538" x="584.615" y="237.862" />
+ <rect fill="LightGrey" height="2.791" stroke-width="0" width="11.538" x="584.615" y="239.318" />
+ <rect fill="white" height="4.489" stroke-width="0" width="11.538" x="584.615" y="242.109" />
+ <rect fill="LightGrey" height="8.372" stroke-width="0" width="11.538" x="584.615" y="246.599" />
+ <rect fill="DarkGrey" height="2.184" stroke-width="0" width="11.538" x="584.615" y="254.971" />
+ <rect fill="DarkGrey" height="2.063" stroke-width="0" width="11.538" x="584.615" y="257.155" />
+ <rect fill="LightGrey" height="8.372" stroke-width="0" width="11.538" x="584.615" y="259.217" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="584.615" y="267.589" />
+ <rect fill="LightGrey" height="3.276" stroke-width="0" width="11.538" x="584.615" y="271.472" />
+ <rect fill="white" height="0.485" stroke-width="0" width="11.538" x="584.615" y="274.748" />
+ <rect fill="LightGrey" height="5.339" stroke-width="0" width="11.538" x="584.615" y="275.234" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="584.615" y="280.572" />
+ <rect fill="LightGrey" height="4.611" stroke-width="0" width="11.538" x="584.615" y="283.242" />
+ <rect fill="white" height="4.611" stroke-width="0" width="11.538" x="584.615" y="287.852" />
+ <rect fill="LightGrey" height="1.820" stroke-width="0" width="11.538" x="584.615" y="292.463" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="584.615" y="294.283" />
+ <rect fill="LightGrey" height="2.926" stroke-width="0" width="11.538" x="584.615" y="297.074" />
+ <rect fill="none" height="34.580" rx="10" ry="10" width="11.538" x="584.615" y="222.574" />
+ <rect fill="none" height="42.845" rx="10" ry="10" width="11.538" x="584.615" y="257.155" />
+ </g>
+ <text x="582.385" y="315.000">19
+ </text>
+ </g>
+ <g id="chr20">
+ <clipPath id="chr20_clipPath">
+ <rect height="32.882" rx="10" ry="10" width="11.538" x="615.385" y="224.243" />
+ <rect height="42.875" rx="10" ry="10" width="11.538" x="615.385" y="257.125" />
+ </clipPath>
+ <g clip-path="url(#chr20_clipPath)">
+ <rect fill="white" height="6.067" stroke-width="0" width="11.538" x="615.385" y="224.243" />
+ <rect fill="rgb(169,169,169)" height="4.853" stroke-width="0" width="11.538" x="615.385" y="230.310" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="615.385" y="235.164" />
+ <rect fill="rgb(169,169,169)" height="7.159" stroke-width="0" width="11.538" x="615.385" y="238.682" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="615.385" y="245.841" />
+ <rect fill="LightGrey" height="1.335" stroke-width="0" width="11.538" x="615.385" y="249.966" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="615.385" y="251.301" />
+ <rect fill="DarkGrey" height="1.699" stroke-width="0" width="11.538" x="615.385" y="255.426" />
+ <rect fill="DarkGrey" height="1.577" stroke-width="0" width="11.538" x="615.385" y="257.125" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="615.385" y="258.703" />
+ <rect fill="LightGrey" height="2.912" stroke-width="0" width="11.538" x="615.385" y="262.464" />
+ <rect fill="white" height="3.883" stroke-width="0" width="11.538" x="615.385" y="265.376" />
+ <rect fill="rgb(169,169,169)" height="4.853" stroke-width="0" width="11.538" x="615.385" y="269.259" />
+ <rect fill="white" height="0.607" stroke-width="0" width="11.538" x="615.385" y="274.112" />
+ <rect fill="LightGrey" height="5.096" stroke-width="0" width="11.538" x="615.385" y="274.719" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="615.385" y="279.815" />
+ <rect fill="rgb(169,169,169)" height="6.309" stroke-width="0" width="11.538" x="615.385" y="283.940" />
+ <rect fill="white" height="1.820" stroke-width="0" width="11.538" x="615.385" y="290.250" />
+ <rect fill="grey" height="2.427" stroke-width="0" width="11.538" x="615.385" y="292.070" />
+ <rect fill="white" height="5.504" stroke-width="0" width="11.538" x="615.385" y="294.496" />
+ <rect fill="none" height="32.882" rx="10" ry="10" width="11.538" x="615.385" y="224.243" />
+ <rect fill="none" height="42.875" rx="10" ry="10" width="11.538" x="615.385" y="257.125" />
+ </g>
+ <text x="613.154" y="315.000">20
+ </text>
+ </g>
+ <g id="chr21">
+ <clipPath id="chr21_clipPath">
+ <rect height="14.924" rx="10" ry="10" width="11.538" x="646.154" y="243.040" />
+ <rect height="42.036" rx="10" ry="10" width="11.538" x="646.154" y="257.964" />
+ </clipPath>
+ <g clip-path="url(#chr21_clipPath)">
+ <rect fill="LightGrey" height="3.519" stroke-width="0" width="11.538" x="646.154" y="243.040" />
+ <rect fill="rgb(169,169,169)" height="4.125" stroke-width="0" width="11.538" x="646.154" y="246.559" />
+ <rect fill="LightGrey" height="4.489" stroke-width="0" width="11.538" x="646.154" y="250.684" />
+ <rect fill="DarkGrey" height="2.791" stroke-width="0" width="11.538" x="646.154" y="255.174" />
+ <rect fill="DarkGrey" height="1.092" stroke-width="0" width="11.538" x="646.154" y="257.964" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="646.154" y="259.056" />
+ <rect fill="black" height="9.221" stroke-width="0" width="11.538" x="646.154" y="261.604" />
+ <rect fill="white" height="3.519" stroke-width="0" width="11.538" x="646.154" y="270.826" />
+ <rect fill="rgb(169,169,169)" height="5.703" stroke-width="0" width="11.538" x="646.154" y="274.345" />
+ <rect fill="white" height="5.096" stroke-width="0" width="11.538" x="646.154" y="280.047" />
+ <rect fill="grey" height="2.427" stroke-width="0" width="11.538" x="646.154" y="285.143" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="646.154" y="287.570" />
+ <rect fill="grey" height="3.397" stroke-width="0" width="11.538" x="646.154" y="289.875" />
+ <rect fill="white" height="6.727" stroke-width="0" width="11.538" x="646.154" y="293.273" />
+ <rect fill="none" height="14.924" rx="10" ry="10" width="11.538" x="646.154" y="243.040" />
+ <rect fill="none" height="42.036" rx="10" ry="10" width="11.538" x="646.154" y="257.964" />
+ </g>
+ <text x="643.923" y="315.000">21
+ </text>
+ </g>
+ <g id="chr22">
+ <clipPath id="chr22_clipPath">
+ <rect height="14.318" rx="10" ry="10" width="11.538" x="676.923" y="239.707" />
+ <rect height="45.976" rx="10" ry="10" width="11.538" x="676.923" y="254.024" />
+ </clipPath>
+ <g clip-path="url(#chr22_clipPath)">
+ <rect fill="LightGrey" height="3.640" stroke-width="0" width="11.538" x="676.923" y="239.707" />
+ <rect fill="rgb(169,169,169)" height="4.368" stroke-width="0" width="11.538" x="676.923" y="243.347" />
+ <rect fill="LightGrey" height="3.640" stroke-width="0" width="11.538" x="676.923" y="247.715" />
+ <rect fill="DarkGrey" height="2.669" stroke-width="0" width="11.538" x="676.923" y="251.355" />
+ <rect fill="DarkGrey" height="5.460" stroke-width="0" width="11.538" x="676.923" y="254.024" />
+ <rect fill="white" height="5.096" stroke-width="0" width="11.538" x="676.923" y="259.485" />
+ <rect fill="LightGrey" height="1.577" stroke-width="0" width="11.538" x="676.923" y="264.581" />
+ <rect fill="white" height="3.033" stroke-width="0" width="11.538" x="676.923" y="266.158" />
+ <rect fill="grey" height="4.368" stroke-width="0" width="11.538" x="676.923" y="269.191" />
+ <rect fill="white" height="3.155" stroke-width="0" width="11.538" x="676.923" y="273.559" />
+ <rect fill="grey" height="6.552" stroke-width="0" width="11.538" x="676.923" y="276.714" />
+ <rect fill="white" height="4.125" stroke-width="0" width="11.538" x="676.923" y="283.266" />
+ <rect fill="grey" height="4.004" stroke-width="0" width="11.538" x="676.923" y="287.392" />
+ <rect fill="white" height="5.339" stroke-width="0" width="11.538" x="676.923" y="291.396" />
+ <rect fill="grey" height="1.456" stroke-width="0" width="11.538" x="676.923" y="296.734" />
+ <rect fill="white" height="1.810" stroke-width="0" width="11.538" x="676.923" y="298.190" />
+ <rect fill="none" height="14.318" rx="10" ry="10" width="11.538" x="676.923" y="239.707" />
+ <rect fill="none" height="45.976" rx="10" ry="10" width="11.538" x="676.923" y="254.024" />
+ </g>
+ <text x="674.692" y="315.000">22
+ </text>
+ </g>
+ <g id="chrX">
+ <rect fill="" height="1" stroke="" width="11" x="713" y="140" />
+ <rect fill="" height="1" stroke="" width="11" x="713" y="297" />
+ <clipPath id="chrX_clipPath">
+ <rect height="72.194" rx="10" ry="10" width="11.538" x="707.692" y="112.036" />
+ <rect height="115.770" rx="10" ry="10" width="11.538" x="707.692" y="184.230" />
+ </clipPath>
+ <g clip-path="url(#chrX_clipPath)">
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="707.692" y="112.036" />
+ <rect fill="grey" height="2.063" stroke-width="0" width="11.538" x="707.692" y="117.253" />
+ <rect fill="white" height="4.247" stroke-width="0" width="11.538" x="707.692" y="119.316" />
+ <rect fill="grey" height="9.221" stroke-width="0" width="11.538" x="707.692" y="123.562" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="707.692" y="132.784" />
+ <rect fill="grey" height="3.155" stroke-width="0" width="11.538" x="707.692" y="135.332" />
+ <rect fill="white" height="3.761" stroke-width="0" width="11.538" x="707.692" y="138.487" />
+ <rect fill="black" height="5.460" stroke-width="0" width="11.538" x="707.692" y="142.248" />
+ <rect fill="white" height="2.548" stroke-width="0" width="11.538" x="707.692" y="147.708" />
+ <rect fill="black" height="7.280" stroke-width="0" width="11.538" x="707.692" y="150.256" />
+ <rect fill="white" height="5.824" stroke-width="0" width="11.538" x="707.692" y="157.536" />
+ <rect fill="rgb(169,169,169)" height="6.067" stroke-width="0" width="11.538" x="707.692" y="163.360" />
+ <rect fill="white" height="2.912" stroke-width="0" width="11.538" x="707.692" y="169.427" />
+ <rect fill="LightGrey" height="6.067" stroke-width="0" width="11.538" x="707.692" y="172.339" />
+ <rect fill="white" height="2.305" stroke-width="0" width="11.538" x="707.692" y="178.406" />
+ <rect fill="DarkGrey" height="3.519" stroke-width="0" width="11.538" x="707.692" y="180.711" />
+ <rect fill="DarkGrey" height="6.673" stroke-width="0" width="11.538" x="707.692" y="184.230" />
+ <rect fill="white" height="0.121" stroke-width="0" width="11.538" x="707.692" y="190.903" />
+ <rect fill="grey" height="3.155" stroke-width="0" width="11.538" x="707.692" y="191.025" />
+ <rect fill="white" height="5.460" stroke-width="0" width="11.538" x="707.692" y="194.179" />
+ <rect fill="grey" height="1.941" stroke-width="0" width="11.538" x="707.692" y="199.639" />
+ <rect fill="white" height="2.669" stroke-width="0" width="11.538" x="707.692" y="201.581" />
+ <rect fill="black" height="10.313" stroke-width="0" width="11.538" x="707.692" y="204.250" />
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="707.692" y="214.564" />
+ <rect fill="black" height="6.916" stroke-width="0" width="11.538" x="707.692" y="216.626" />
+ <rect fill="white" height="1.941" stroke-width="0" width="11.538" x="707.692" y="223.542" />
+ <rect fill="rgb(169,169,169)" height="5.703" stroke-width="0" width="11.538" x="707.692" y="225.484" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="707.692" y="231.186" />
+ <rect fill="grey" height="1.335" stroke-width="0" width="11.538" x="707.692" y="236.404" />
+ <rect fill="white" height="8.372" stroke-width="0" width="11.538" x="707.692" y="237.739" />
+ <rect fill="rgb(169,169,169)" height="7.644" stroke-width="0" width="11.538" x="707.692" y="246.111" />
+ <rect fill="white" height="4.732" stroke-width="0" width="11.538" x="707.692" y="253.755" />
+ <rect fill="black" height="11.041" stroke-width="0" width="11.538" x="707.692" y="258.487" />
+ <rect fill="white" height="0.607" stroke-width="0" width="11.538" x="707.692" y="269.528" />
+ <rect fill="LightGrey" height="3.883" stroke-width="0" width="11.538" x="707.692" y="270.135" />
+ <rect fill="white" height="5.217" stroke-width="0" width="11.538" x="707.692" y="274.018" />
+ <rect fill="rgb(169,169,169)" height="2.791" stroke-width="0" width="11.538" x="707.692" y="279.235" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="707.692" y="282.026" />
+ <rect fill="black" height="6.067" stroke-width="0" width="11.538" x="707.692" y="284.210" />
+ <rect fill="white" height="9.723" stroke-width="0" width="11.538" x="707.692" y="290.277" />
+ <rect fill="none" height="72.194" rx="10" ry="10" width="11.538" x="707.692" y="112.036" />
+ <rect fill="none" height="115.770" rx="10" ry="10" width="11.538" x="707.692" y="184.230" />
+ </g>
+ <text x="709.462" y="315.000">X
+ </text>
+ </g>
+ <g id="chrY">
+ <clipPath id="chrY_clipPath">
+ <rect height="13.711" rx="10" ry="10" width="11.538" x="738.462" y="229.901" />
+ <rect height="56.388" rx="10" ry="10" width="11.538" x="738.462" y="243.612" />
+ </clipPath>
+ <g clip-path="url(#chrY_clipPath)">
+ <rect fill="white" height="2.063" stroke-width="0" width="11.538" x="738.462" y="229.901" />
+ <rect fill="grey" height="1.941" stroke-width="0" width="11.538" x="738.462" y="231.964" />
+ <rect fill="white" height="9.585" stroke-width="0" width="11.538" x="738.462" y="233.905" />
+ <rect fill="DarkGrey" height="0.121" stroke-width="0" width="11.538" x="738.462" y="243.491" />
+ <rect fill="DarkGrey" height="1.456" stroke-width="0" width="11.538" x="738.462" y="243.612" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="738.462" y="245.068" />
+ <rect fill="grey" height="5.703" stroke-width="0" width="11.538" x="738.462" y="247.252" />
+ <rect fill="white" height="2.791" stroke-width="0" width="11.538" x="738.462" y="252.955" />
+ <rect fill="grey" height="4.975" stroke-width="0" width="11.538" x="738.462" y="255.746" />
+ <rect fill="white" height="2.184" stroke-width="0" width="11.538" x="738.462" y="260.720" />
+ <rect fill="LightGrey" height="37.096" stroke-width="0" width="11.538" x="738.462" y="262.904" />
+ <rect fill="none" height="13.711" rx="10" ry="10" width="11.538" x="738.462" y="229.901" />
+ <rect fill="none" height="56.388" rx="10" ry="10" width="11.538" x="738.462" y="243.612" />
+ </g>
+ <text x="740.231" y="315.000">Y
+ </text>
+ </g>
+ </g><!--
+ Generated using the Perl SVG Module V2.33
+ by Ronan Oger
+ Info: http://www.roasp.com/
+ -->
+</svg>
\ No newline at end of file
--- /dev/null
+%!PS-Adobe-2.0
+%%Creator: gnuplot 4.2 patchlevel 0
+%%CreationDate: Mon Sep 3 10:28:29 2007
+%%DocumentFonts: (atend)
+%%BoundingBox: 50 50 554 770
+%%Orientation: Landscape
+%%Pages: (atend)
+%%EndComments
+%%BeginProlog
+/gnudict 256 dict def
+gnudict begin
+%
+% The following 6 true/false flags may be edited by hand if required
+% The unit line width may also be changed
+%
+/Color false def
+/Blacktext false def
+/Solid false def
+/Dashlength 1 def
+/Landscape true def
+/Level1 false def
+/Rounded false def
+/TransparentPatterns false def
+/gnulinewidth 5.000 def
+/userlinewidth gnulinewidth def
+%
+/vshift -33 def
+/dl1 {
+ 10.0 Dashlength mul mul
+ Rounded { currentlinewidth 0.75 mul sub dup 0 le { pop 0.01 } if } if
+} def
+/dl2 {
+ 10.0 Dashlength mul mul
+ Rounded { currentlinewidth 0.75 mul add } if
+} def
+/hpt_ 31.5 def
+/vpt_ 31.5 def
+/hpt hpt_ def
+/vpt vpt_ def
+Level1 {} {
+/SDict 10 dict def
+systemdict /pdfmark known not {
+ userdict /pdfmark systemdict /cleartomark get put
+} if
+SDict begin [
+ /Title ()
+ /Subject (gnuplot plot)
+ /Creator (gnuplot 4.2 patchlevel 0)
+ /Author (Martin Hansen)
+% /Producer (gnuplot)
+% /Keywords ()
+ /CreationDate (Mon Sep 3 10:28:29 2007)
+ /DOCINFO pdfmark
+end
+} ifelse
+%
+% Gnuplot Prolog Version 4.2 (August 2006)
+%
+/M {moveto} bind def
+/L {lineto} bind def
+/R {rmoveto} bind def
+/V {rlineto} bind def
+/N {newpath moveto} bind def
+/Z {closepath} bind def
+/C {setrgbcolor} bind def
+/f {rlineto fill} bind def
+/vpt2 vpt 2 mul def
+/hpt2 hpt 2 mul def
+/Lshow {currentpoint stroke M 0 vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/Rshow {currentpoint stroke M dup stringwidth pop neg vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/Cshow {currentpoint stroke M dup stringwidth pop -2 div vshift R
+ Blacktext {gsave 0 setgray show grestore} {show} ifelse} def
+/UP {dup vpt_ mul /vpt exch def hpt_ mul /hpt exch def
+ /hpt2 hpt 2 mul def /vpt2 vpt 2 mul def} def
+/DL {Color {setrgbcolor Solid {pop []} if 0 setdash}
+ {pop pop pop 0 setgray Solid {pop []} if 0 setdash} ifelse} def
+/BL {stroke userlinewidth 2 mul setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+/AL {stroke userlinewidth 2 div setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+/UL {dup gnulinewidth mul /userlinewidth exch def
+ dup 1 lt {pop 1} if 10 mul /udl exch def} def
+/PL {stroke userlinewidth setlinewidth
+ Rounded {1 setlinejoin 1 setlinecap} if} def
+% Default Line colors
+/LCw {1 1 1} def
+/LCb {0 0 0} def
+/LCa {0 0 0} def
+/LC0 {1 0 0} def
+/LC1 {0 1 0} def
+/LC2 {0 0 1} def
+/LC3 {1 0 1} def
+/LC4 {0 1 1} def
+/LC5 {1 1 0} def
+/LC6 {0 0 0} def
+/LC7 {1 0.3 0} def
+/LC8 {0.5 0.5 0.5} def
+% Default Line Types
+/LTw {PL [] 1 setgray} def
+/LTb {BL [] LCb DL} def
+/LTa {AL [1 udl mul 2 udl mul] 0 setdash LCa setrgbcolor} def
+/LT0 {PL [] LC0 DL} def
+/LT1 {PL [4 dl1 2 dl2] LC1 DL} def
+/LT2 {PL [2 dl1 3 dl2] LC2 DL} def
+/LT3 {PL [1 dl1 1.5 dl2] LC3 DL} def
+/LT4 {PL [6 dl1 2 dl2 1 dl1 2 dl2] LC4 DL} def
+/LT5 {PL [3 dl1 3 dl2 1 dl1 3 dl2] LC5 DL} def
+/LT6 {PL [2 dl1 2 dl2 2 dl1 6 dl2] LC6 DL} def
+/LT7 {PL [1 dl1 2 dl2 6 dl1 2 dl2 1 dl1 2 dl2] LC7 DL} def
+/LT8 {PL [2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 2 dl2 2 dl1 4 dl2] LC8 DL} def
+/Pnt {stroke [] 0 setdash gsave 1 setlinecap M 0 0 V stroke grestore} def
+/Dia {stroke [] 0 setdash 2 copy vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath stroke
+ Pnt} def
+/Pls {stroke [] 0 setdash vpt sub M 0 vpt2 V
+ currentpoint stroke M
+ hpt neg vpt neg R hpt2 0 V stroke
+ } def
+/Box {stroke [] 0 setdash 2 copy exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath stroke
+ Pnt} def
+/Crs {stroke [] 0 setdash exch hpt sub exch vpt add M
+ hpt2 vpt2 neg V currentpoint stroke M
+ hpt2 neg 0 R hpt2 vpt2 V stroke} def
+/TriU {stroke [] 0 setdash 2 copy vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath stroke
+ Pnt} def
+/Star {2 copy Pls Crs} def
+/BoxF {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath fill} def
+/TriUF {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath fill} def
+/TriD {stroke [] 0 setdash 2 copy vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath stroke
+ Pnt} def
+/TriDF {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath fill} def
+/DiaF {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath fill} def
+/Pent {stroke [] 0 setdash 2 copy gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath stroke grestore Pnt} def
+/PentF {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath fill grestore} def
+/Circle {stroke [] 0 setdash 2 copy
+ hpt 0 360 arc stroke Pnt} def
+/CircleF {stroke [] 0 setdash hpt 0 360 arc fill} def
+/C0 {BL [] 0 setdash 2 copy moveto vpt 90 450 arc} bind def
+/C1 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C2 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C3 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C4 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 180 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C5 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc
+ 2 copy moveto
+ 2 copy vpt 180 270 arc closepath fill
+ vpt 0 360 arc} bind def
+/C6 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C7 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 270 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C8 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 270 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C9 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 270 450 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C10 {BL [] 0 setdash 2 copy 2 copy moveto vpt 270 360 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 90 180 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C11 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 180 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 270 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C12 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 180 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C13 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 0 90 arc closepath fill
+ 2 copy moveto
+ 2 copy vpt 180 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/C14 {BL [] 0 setdash 2 copy moveto
+ 2 copy vpt 90 360 arc closepath fill
+ vpt 0 360 arc} bind def
+/C15 {BL [] 0 setdash 2 copy vpt 0 360 arc closepath fill
+ vpt 0 360 arc closepath} bind def
+/Rec {newpath 4 2 roll moveto 1 index 0 rlineto 0 exch rlineto
+ neg 0 rlineto closepath} bind def
+/Square {dup Rec} bind def
+/Bsquare {vpt sub exch vpt sub exch vpt2 Square} bind def
+/S0 {BL [] 0 setdash 2 copy moveto 0 vpt rlineto BL Bsquare} bind def
+/S1 {BL [] 0 setdash 2 copy vpt Square fill Bsquare} bind def
+/S2 {BL [] 0 setdash 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def
+/S3 {BL [] 0 setdash 2 copy exch vpt sub exch vpt2 vpt Rec fill Bsquare} bind def
+/S4 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def
+/S5 {BL [] 0 setdash 2 copy 2 copy vpt Square fill
+ exch vpt sub exch vpt sub vpt Square fill Bsquare} bind def
+/S6 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill Bsquare} bind def
+/S7 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt vpt2 Rec fill
+ 2 copy vpt Square fill Bsquare} bind def
+/S8 {BL [] 0 setdash 2 copy vpt sub vpt Square fill Bsquare} bind def
+/S9 {BL [] 0 setdash 2 copy vpt sub vpt vpt2 Rec fill Bsquare} bind def
+/S10 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt Square fill
+ Bsquare} bind def
+/S11 {BL [] 0 setdash 2 copy vpt sub vpt Square fill 2 copy exch vpt sub exch vpt2 vpt Rec fill
+ Bsquare} bind def
+/S12 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill Bsquare} bind def
+/S13 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill
+ 2 copy vpt Square fill Bsquare} bind def
+/S14 {BL [] 0 setdash 2 copy exch vpt sub exch vpt sub vpt2 vpt Rec fill
+ 2 copy exch vpt sub exch vpt Square fill Bsquare} bind def
+/S15 {BL [] 0 setdash 2 copy Bsquare fill Bsquare} bind def
+/D0 {gsave translate 45 rotate 0 0 S0 stroke grestore} bind def
+/D1 {gsave translate 45 rotate 0 0 S1 stroke grestore} bind def
+/D2 {gsave translate 45 rotate 0 0 S2 stroke grestore} bind def
+/D3 {gsave translate 45 rotate 0 0 S3 stroke grestore} bind def
+/D4 {gsave translate 45 rotate 0 0 S4 stroke grestore} bind def
+/D5 {gsave translate 45 rotate 0 0 S5 stroke grestore} bind def
+/D6 {gsave translate 45 rotate 0 0 S6 stroke grestore} bind def
+/D7 {gsave translate 45 rotate 0 0 S7 stroke grestore} bind def
+/D8 {gsave translate 45 rotate 0 0 S8 stroke grestore} bind def
+/D9 {gsave translate 45 rotate 0 0 S9 stroke grestore} bind def
+/D10 {gsave translate 45 rotate 0 0 S10 stroke grestore} bind def
+/D11 {gsave translate 45 rotate 0 0 S11 stroke grestore} bind def
+/D12 {gsave translate 45 rotate 0 0 S12 stroke grestore} bind def
+/D13 {gsave translate 45 rotate 0 0 S13 stroke grestore} bind def
+/D14 {gsave translate 45 rotate 0 0 S14 stroke grestore} bind def
+/D15 {gsave translate 45 rotate 0 0 S15 stroke grestore} bind def
+/DiaE {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V closepath stroke} def
+/BoxE {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V closepath stroke} def
+/TriUE {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V closepath stroke} def
+/TriDE {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V closepath stroke} def
+/PentE {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ closepath stroke grestore} def
+/CircE {stroke [] 0 setdash
+ hpt 0 360 arc stroke} def
+/Opaque {gsave closepath 1 setgray fill grestore 0 setgray closepath} def
+/DiaW {stroke [] 0 setdash vpt add M
+ hpt neg vpt neg V hpt vpt neg V
+ hpt vpt V hpt neg vpt V Opaque stroke} def
+/BoxW {stroke [] 0 setdash exch hpt sub exch vpt add M
+ 0 vpt2 neg V hpt2 0 V 0 vpt2 V
+ hpt2 neg 0 V Opaque stroke} def
+/TriUW {stroke [] 0 setdash vpt 1.12 mul add M
+ hpt neg vpt -1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt 1.62 mul V Opaque stroke} def
+/TriDW {stroke [] 0 setdash vpt 1.12 mul sub M
+ hpt neg vpt 1.62 mul V
+ hpt 2 mul 0 V
+ hpt neg vpt -1.62 mul V Opaque stroke} def
+/PentW {stroke [] 0 setdash gsave
+ translate 0 hpt M 4 {72 rotate 0 hpt L} repeat
+ Opaque stroke grestore} def
+/CircW {stroke [] 0 setdash
+ hpt 0 360 arc Opaque stroke} def
+/BoxFill {gsave Rec 1 setgray fill grestore} def
+/Density {
+ /Fillden exch def
+ currentrgbcolor
+ /ColB exch def /ColG exch def /ColR exch def
+ /ColR ColR Fillden mul Fillden sub 1 add def
+ /ColG ColG Fillden mul Fillden sub 1 add def
+ /ColB ColB Fillden mul Fillden sub 1 add def
+ ColR ColG ColB setrgbcolor} def
+/BoxColFill {gsave Rec PolyFill} def
+/PolyFill {gsave Density fill grestore grestore} def
+/h {rlineto rlineto rlineto gsave fill grestore} bind def
+%
+% PostScript Level 1 Pattern Fill routine for rectangles
+% Usage: x y w h s a XX PatternFill
+% x,y = lower left corner of box to be filled
+% w,h = width and height of box
+% a = angle in degrees between lines and x-axis
+% XX = 0/1 for no/yes cross-hatch
+%
+/PatternFill {gsave /PFa [ 9 2 roll ] def
+ PFa 0 get PFa 2 get 2 div add PFa 1 get PFa 3 get 2 div add translate
+ PFa 2 get -2 div PFa 3 get -2 div PFa 2 get PFa 3 get Rec
+ gsave 1 setgray fill grestore clip
+ currentlinewidth 0.5 mul setlinewidth
+ /PFs PFa 2 get dup mul PFa 3 get dup mul add sqrt def
+ 0 0 M PFa 5 get rotate PFs -2 div dup translate
+ 0 1 PFs PFa 4 get div 1 add floor cvi
+ {PFa 4 get mul 0 M 0 PFs V} for
+ 0 PFa 6 get ne {
+ 0 1 PFs PFa 4 get div 1 add floor cvi
+ {PFa 4 get mul 0 2 1 roll M PFs 0 V} for
+ } if
+ stroke grestore} def
+%
+/languagelevel where
+ {pop languagelevel} {1} ifelse
+ 2 lt
+ {/InterpretLevel1 true def}
+ {/InterpretLevel1 Level1 def}
+ ifelse
+%
+% PostScript level 2 pattern fill definitions
+%
+/Level2PatternFill {
+/Tile8x8 {/PaintType 2 /PatternType 1 /TilingType 1 /BBox [0 0 8 8] /XStep 8 /YStep 8}
+ bind def
+/KeepColor {currentrgbcolor [/Pattern /DeviceRGB] setcolorspace} bind def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke}
+>> matrix makepattern
+/Pat1 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 8 8 L 0 8 M 8 0 L stroke
+ 0 4 M 4 8 L 8 4 L 4 0 L 0 4 L stroke}
+>> matrix makepattern
+/Pat2 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 0 M 0 8 L
+ 8 8 L 8 0 L 0 0 L fill}
+>> matrix makepattern
+/Pat3 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -4 8 M 8 -4 L
+ 0 12 M 12 0 L stroke}
+>> matrix makepattern
+/Pat4 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -4 0 M 8 12 L
+ 0 -4 M 12 8 L stroke}
+>> matrix makepattern
+/Pat5 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -2 8 M 4 -4 L
+ 0 12 M 8 -4 L 4 12 M 10 0 L stroke}
+>> matrix makepattern
+/Pat6 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop -2 0 M 4 12 L
+ 0 -4 M 8 12 L 4 -4 M 10 8 L stroke}
+>> matrix makepattern
+/Pat7 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 8 -2 M -4 4 L
+ 12 0 M -4 8 L 12 4 M 0 10 L stroke}
+>> matrix makepattern
+/Pat8 exch def
+<< Tile8x8
+ /PaintProc {0.5 setlinewidth pop 0 -2 M 12 4 L
+ -4 0 M 12 8 L -4 4 M 8 10 L stroke}
+>> matrix makepattern
+/Pat9 exch def
+/Pattern1 {PatternBgnd KeepColor Pat1 setpattern} bind def
+/Pattern2 {PatternBgnd KeepColor Pat2 setpattern} bind def
+/Pattern3 {PatternBgnd KeepColor Pat3 setpattern} bind def
+/Pattern4 {PatternBgnd KeepColor Landscape {Pat5} {Pat4} ifelse setpattern} bind def
+/Pattern5 {PatternBgnd KeepColor Landscape {Pat4} {Pat5} ifelse setpattern} bind def
+/Pattern6 {PatternBgnd KeepColor Landscape {Pat9} {Pat6} ifelse setpattern} bind def
+/Pattern7 {PatternBgnd KeepColor Landscape {Pat8} {Pat7} ifelse setpattern} bind def
+} def
+%
+%
+%End of PostScript Level 2 code
+%
+/PatternBgnd {
+ TransparentPatterns {} {gsave 1 setgray fill grestore} ifelse
+} def
+%
+% Substitute for Level 2 pattern fill codes with
+% grayscale if Level 2 support is not selected.
+%
+/Level1PatternFill {
+/Pattern1 {0.250 Density} bind def
+/Pattern2 {0.500 Density} bind def
+/Pattern3 {0.750 Density} bind def
+/Pattern4 {0.125 Density} bind def
+/Pattern5 {0.375 Density} bind def
+/Pattern6 {0.625 Density} bind def
+/Pattern7 {0.875 Density} bind def
+} def
+%
+% Now test for support of Level 2 code
+%
+Level1 {Level1PatternFill} {Level2PatternFill} ifelse
+%
+/Symbol-Oblique /Symbol findfont [1 0 .167 1 0 0] makefont
+dup length dict begin {1 index /FID eq {pop pop} {def} ifelse} forall
+currentdict end definefont pop
+end
+%%EndProlog
+%%Page: 1 1
+gnudict begin
+gsave
+50 50 translate
+0.100 0.100 scale
+90 rotate
+0 -5040 translate
+0 setgray
+newpath
+(Helvetica) findfont 100 scalefont setfont
+1.000 UL
+LTb
+410 263 M
+63 0 V
+6557 0 R
+-63 0 V
+350 263 M
+( 0) Rshow
+1.000 UL
+LTb
+410 903 M
+63 0 V
+6557 0 R
+-63 0 V
+350 903 M
+( 20) Rshow
+1.000 UL
+LTb
+410 1542 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 40) Rshow
+1.000 UL
+LTb
+410 2182 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 60) Rshow
+1.000 UL
+LTb
+410 2821 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 80) Rshow
+1.000 UL
+LTb
+410 3461 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 100) Rshow
+1.000 UL
+LTb
+410 4100 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 120) Rshow
+1.000 UL
+LTb
+410 4740 M
+63 0 V
+6557 0 R
+-63 0 V
+-6617 0 R
+( 140) Rshow
+1.000 UL
+LTb
+571 263 M
+0 -63 V
+0 -100 R
+( 0) Cshow
+1.000 UL
+LTb
+1379 263 M
+0 -63 V
+0 -100 R
+( 5) Cshow
+1.000 UL
+LTb
+2186 263 M
+0 -63 V
+0 -100 R
+( 10) Cshow
+1.000 UL
+LTb
+2993 263 M
+0 -63 V
+0 -100 R
+( 15) Cshow
+1.000 UL
+LTb
+3801 263 M
+0 -63 V
+0 -100 R
+( 20) Cshow
+1.000 UL
+LTb
+4608 263 M
+0 -63 V
+0 -100 R
+( 25) Cshow
+1.000 UL
+LTb
+5415 263 M
+0 -63 V
+0 -100 R
+( 30) Cshow
+1.000 UL
+LTb
+6223 263 M
+0 -63 V
+0 -100 R
+( 35) Cshow
+1.000 UL
+LTb
+7030 263 M
+0 -63 V
+0 -100 R
+( 40) Cshow
+1.000 UL
+LTb
+1.000 UL
+LTb
+410 4740 N
+410 263 L
+6620 0 V
+0 4477 V
+-6620 0 V
+Z stroke
+3720 4890 M
+(Length Distribution) Cshow
+1.000 UP
+1.000 UL
+LTb
+1.000 UL
+LT0
+/Helvetica findfont 100 scalefont setfont
+1.000 531 263 82 1 BoxColFill
+531 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 693 263 81 1 BoxColFill
+693 263 N
+80 0 V
+-80 0 V
+Z stroke
+1.000 854 263 82 1 BoxColFill
+854 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1015 263 82 1 BoxColFill
+1015 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1177 263 82 1 BoxColFill
+1177 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1338 263 82 1 BoxColFill
+1338 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1500 263 82 1 BoxColFill
+1500 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1661 263 82 1 BoxColFill
+1661 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1823 263 82 1 BoxColFill
+1823 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 1984 263 82 1 BoxColFill
+1984 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 2146 263 81 1 BoxColFill
+2146 263 N
+80 0 V
+-80 0 V
+Z stroke
+1.000 2307 263 82 1 BoxColFill
+2307 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 2469 263 81 1 BoxColFill
+2469 263 N
+80 0 V
+-80 0 V
+Z stroke
+1.000 2630 263 82 1 BoxColFill
+2630 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 2792 263 81 1 BoxColFill
+2792 263 N
+80 0 V
+-80 0 V
+Z stroke
+1.000 2953 263 82 1 BoxColFill
+2953 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 3115 263 81 1 BoxColFill
+3115 263 N
+80 0 V
+-80 0 V
+Z stroke
+1.000 3276 263 82 1 BoxColFill
+3276 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 3437 263 82 97 BoxColFill
+3437 263 N
+0 96 V
+81 0 V
+0 -96 V
+-81 0 V
+Z stroke
+1.000 3599 263 82 1 BoxColFill
+3599 263 N
+81 0 V
+-81 0 V
+Z stroke
+1.000 3760 263 82 129 BoxColFill
+3760 263 N
+0 128 V
+81 0 V
+0 -128 V
+-81 0 V
+Z stroke
+1.000 3922 263 82 225 BoxColFill
+3922 263 N
+0 224 V
+81 0 V
+0 -224 V
+-81 0 V
+Z stroke
+1.000 4083 263 82 257 BoxColFill
+4083 263 N
+0 256 V
+81 0 V
+0 -256 V
+-81 0 V
+Z stroke
+1.000 4245 263 81 417 BoxColFill
+4245 263 N
+0 416 V
+80 0 V
+0 -416 V
+-80 0 V
+Z stroke
+1.000 4406 263 82 385 BoxColFill
+4406 263 N
+0 384 V
+81 0 V
+0 -384 V
+-81 0 V
+Z stroke
+1.000 4568 263 81 1120 BoxColFill
+4568 263 N
+0 1119 V
+80 0 V
+0 -1119 V
+-80 0 V
+Z stroke
+1.000 4729 263 82 1408 BoxColFill
+4729 263 N
+0 1407 V
+81 0 V
+0 -1407 V
+-81 0 V
+Z stroke
+1.000 4891 263 81 2527 BoxColFill
+4891 263 N
+0 2526 V
+80 0 V
+0 -2526 V
+-80 0 V
+Z stroke
+1.000 5052 263 82 2943 BoxColFill
+5052 263 N
+0 2942 V
+81 0 V
+0 -2942 V
+-81 0 V
+Z stroke
+1.000 5214 263 81 4094 BoxColFill
+5214 263 N
+0 4093 V
+80 0 V
+0 -4093 V
+-80 0 V
+Z stroke
+1.000 5375 263 82 4062 BoxColFill
+5375 263 N
+0 4061 V
+81 0 V
+0 -4061 V
+-81 0 V
+Z stroke
+1.000 5536 263 82 1824 BoxColFill
+5536 263 N
+0 1823 V
+81 0 V
+0 -1823 V
+-81 0 V
+Z stroke
+1.000 5698 263 82 353 BoxColFill
+5698 263 N
+0 352 V
+81 0 V
+0 -352 V
+-81 0 V
+Z stroke
+1.000 5859 263 82 129 BoxColFill
+5859 263 N
+0 128 V
+81 0 V
+0 -128 V
+-81 0 V
+Z stroke
+1.000 6021 263 82 33 BoxColFill
+6021 263 N
+0 32 V
+81 0 V
+0 -32 V
+-81 0 V
+Z stroke
+1.000 6182 263 82 33 BoxColFill
+6182 263 N
+0 32 V
+81 0 V
+0 -32 V
+-81 0 V
+Z stroke
+1.000 6344 263 82 33 BoxColFill
+6344 263 N
+0 32 V
+81 0 V
+0 -32 V
+-81 0 V
+Z stroke
+1.000 6505 263 82 65 BoxColFill
+6505 263 N
+0 64 V
+81 0 V
+0 -64 V
+-81 0 V
+Z stroke
+1.000 UL
+LTb
+410 4740 N
+410 263 L
+6620 0 V
+0 4477 V
+-6620 0 V
+Z stroke
+1.000 UP
+1.000 UL
+LTb
+stroke
+grestore
+end
+showpage
+%%Trailer
+%%DocumentFonts: Helvetica
+%%Pages: 1
--- /dev/null
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<svg height="100%" style="font-weight: normal; font-family: Courier New; font-size: 10" width="100%" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <g transform="translate(0,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(0,29.53125)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(0,29.0625)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(0,28.125)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(7,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(7,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(7,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(7,30)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(7,30)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(14,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0505505023770181)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(14,29.6461464833609)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0505505023770181)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(14,29.2922929667217)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.151651507131054)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(14,28.2307324168044)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.151651507131054)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(21,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(21,29.53125)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(21,29.0625)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(21,28.125)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(28,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(28,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(28,26.4797929667217)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(35,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(35,28.8855732416804)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(35,25.5422929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(42,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(42,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(42,26.4797929667217)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(49,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.334821428571429)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(49,27.65625)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.334821428571429)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(56,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,-0.12506710357733)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(56,30.8754697250413)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,-0.0833780690515533)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(56,31.4591162084022)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(56,31.7509394500826)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(56,32.0427626917631)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(63,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(63,29.53125)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(63,29.0625)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(63,28.125)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(70,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(70,29.53125)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(70,29.0625)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(70,28.125)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(77,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(77,27.2691833309992)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(84,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(84,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(91,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(91,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(98,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.187822783742304)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(98,28.6852405138039)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.375645567484608)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(98,26.0557215414116)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.93911391871152)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(105,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(105,27.2691833309992)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(112,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(112,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(119,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(119,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(126,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(126,27.2691833309992)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(133,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(133,28.8855732416804)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(133,25.5422929667217)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(140,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(140,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(147,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(147,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(154,30)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(154,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(154,26.4797929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(161,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(161,28.8855732416804)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(161,25.5422929667217)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(168,30)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(168,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(168,26.4797929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(175,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(175,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(182,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(182,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(182,30)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(182,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(182,30)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(189,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(189,27.2691833309992)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(196,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(196,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(196,26.4797929667217)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(203,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.351889788091304)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(203,27.5367714833609)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,2.11133872854782)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(210,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(210,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(217,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(217,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(224,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0"><
+ </text>
+ </g>
+ <g transform="translate(224,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(231,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(231,29.53125)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(231,29.0625)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(231,28.125)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(238,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(238,28.8855732416804)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(238,25.5422929667217)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(245,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.050550502377018)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(245,29.6461464833609)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.050550502377018)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(245,29.2922929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.151651507131054)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(245,28.2307324168044)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.151651507131054)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(252,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.187822783742304)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(252,28.6852405138039)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.375645567484608)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(252,26.0557215414116)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.93911391871152)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(259,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.200892857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(259,28.59375)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.200892857142857)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(259,27.1875)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.401785714285714)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(266,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.234375)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(266,28.359375)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.234375)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(266,26.71875)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.234375)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(273,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.200892857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(273,28.59375)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.200892857142857)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(273,27.1875)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.803571428571429)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(280,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,-0.12506710357733)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(280,30.8754697250413)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,-0.0833780690515533)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(280,31.4591162084022)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(280,31.7509394500826)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(280,32.0427626917631)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(287,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(287,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(294,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.050550502377018)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(294,29.6461464833609)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.050550502377018)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(294,29.2922929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.151651507131054)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(294,28.2307324168044)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.151651507131054)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(301,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.187822783742304)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(301,28.6852405138039)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.375645567484608)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(301,26.0557215414116)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.93911391871152)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(308,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(308,28.8855732416804)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(308,25.5422929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(315,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(315,28.8855732416804)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(315,25.5422929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(322,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.025275251188509)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(322,29.8230732416804)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0505505023770181)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(322,29.4692197250413)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0505505023770181)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(322,29.1153662084022)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0758257535655271)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(329,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(329,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(329,26.4797929667217)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(336,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(336,28.8855732416804)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(336,25.5422929667217)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(343,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(343,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(350,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(350,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(357,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(357,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(364,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(364,27.2691833309992)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(371,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(371,27.2691833309992)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(378,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(378,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(385,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(385,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(392,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(392,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(399,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(399,27.2691833309992)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(406,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(406,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(413,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(413,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(420,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(420,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(427,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(427,28.2398964833609)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(427,26.4797929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(434,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(434,27.2691833309992)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(441,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(441,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(448,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(448,27.2691833309992)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(455,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(455,27.2691833309992)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(462,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(462,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(469,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(469,28.2398964833609)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(469,26.4797929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(476,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(476,27.2691833309992)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(483,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.390116667000108)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(483,27.2691833309992)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.73081666900076)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(490,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(490,28.2398964833609)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(490,26.4797929667217)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(497,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(497,28.2398964833609)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(497,26.4797929667217)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(504,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.15920382261708)" x="0" y="0">>
+ </text>
+ </g>
+ <g transform="translate(504,28.8855732416804)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.477611467851241)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(504,25.5422929667217)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.636815290468322)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(511,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.025275251188509)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(511,29.8230732416804)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0505505023770181)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(511,29.4692197250413)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0505505023770181)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(511,29.1153662084022)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0758257535655271)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(518,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(518,29.53125)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(518,29.0625)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(518,28.125)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(525,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(525,29.53125)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0669642857142857)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(525,29.0625)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.133928571428571)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(525,28.125)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.267857142857143)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(532,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,-0.12506710357733)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(532,30.8754697250413)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,-0.0833780690515533)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(532,31.4591162084022)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(532,31.7509394500826)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(532,32.0427626917631)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,-0.0416890345257767)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(539,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(539,28.2398964833609)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.251443359519875)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(539,26.4797929667217)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.50866015711925)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(546,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.120858498028018)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(546,29.1539905138039)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0.120858498028018)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(546,28.3079810276077)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.120858498028018)" x="0" y="0">U
+ </text>
+ </g>
+ <g transform="translate(546,27.4619715414116)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.604292490140091)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(553,30)">
+ <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0)" x="0" y="0">A
+ </text>
+ </g>
+ <g transform="translate(553,30)">
+ <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0)" x="0" y="0">C
+ </text>
+ </g>
+ <g transform="translate(553,30)">
+ <text style="fill: #000000; font-weight: bold" transform="scale(1,0)" x="0" y="0">.
+ </text>
+ </g>
+ <g transform="translate(553,30)">
+ <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0)" x="0" y="0">G
+ </text>
+ </g>
+ <g transform="translate(553,30)">
+ <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0)" x="0" y="0">U
+ </text>
+ </g>
+ <g style="stroke: black; font-size: 8px" transform="translate(-10)">
+ <line x1="0" x2="0" y1="0" y2="30" />
+ <line x1="-5" x2="0" y1="0" y2="0" />
+ <text style="stroke: none" x="-13" y="2">2
+ </text>
+ <line x1="-5" x2="0" y1="15" y2="15" />
+ <text style="stroke: none" x="-13" y="17">1
+ </text>
+ <line x1="-5" x2="0" y1="30" y2="30" />
+ <text style="stroke: none" x="-13" y="32">0
+ </text>
+ </g>
+ <text style="stroke: none" transform="rotate(-90)" x="-26" y="-30">bits
+ </text><!--
+ Generated using the Perl SVG Module V2.33
+ by Ronan Oger
+ Info: http://www.roasp.com/
+ -->
+</svg>
\ No newline at end of file
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Maasha::Biotools;
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: May 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Adds a unique identifier to each record in stream.
+
+Usage: ... | $script [options]
+
+Options: [-k <string> | --key=<string>] - Identifier key - Default=ID
+Options: [-p <string> | --prefix=<string>] - Identifier prefix - Default=ID
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Add identifier to all records.
+Examples: ... | $script -k SEQ_NAME - Change the identifier key to SEQ_NAME.
+Examples: ... | $script -p ID_ - Change identifier prefix from ID00000000 to ID_00000000.
+
+Keys out: <ID> - The specified key.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Align sequences in stream using Muscle.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Aligns all sequences in stream.
+
+Keys in: SEQ_NAME - name of sequence.
+Keys in: Q_ID - used as sequence name if no SEQ_NAME.
+Keys in: SEQ - unaligned sequence.
+
+Keys out: SEQ_NAME - Name of sequence.
+Keys out: SEQ - Aligned sequence.
+Keys out: ALIGN - Number indicating what alignment this sequence belong to.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Analysis BED entries in the stream.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | $script - Analyzes all BED entries in the stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Analysis the residue composition of each sequence in stream.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | $script - Analyzes all sequences in stream.
+
+Keys out: SEQ_TYPE - Guessed Sequence type.
+Keys out: SEQ_LEN - Sequence length.
+Keys out: RES - Residue count.
+Keys out: RES_SUM - Sum of all non-indel residues.
+Keys out: GC% - GC content in percent for DNA/RNA sequences.
+Keys out: HARD_MASK% - Percentage of sequence hard-masked with N's.
+Keys out: SOFT_MASK% - Percentage of sequence soft-masked with lower case letters.
+Keys out: MIX_INDEX - Sequence composition index: most common residue over the sequence length.
+Keys out: MELT_TEMP - Melting temperature of DNA/RNA sequence: 4 degrees per GC pair, 2 degrees per AT/U pair.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: May 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Analyze sequence tags in the stream from sequence or BED records resulting in a tag length and clone count distribution.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | $script - Analyzes all entries with SEQ in the stream.
+
+Keys in: Q_ID/SEQ_NAME - Identifier with clone/read count: ID00001_123 (123 is the clone count).
+Keys in: SEQ/BED_LEN - Sequence.
+
+Keys out: TAG_LEN - Length of sequence tags.
+Keys out: TAG_COUNT - Number of tags with a given TAG_LEN.
+Keys out: TAG_CLONES - Total clones or reads for all tags (TAG_COUNT) of TAG_LEN.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: January 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Determine type, count, min, max, sum and mean for values in stream.
+
+Usage: ... | analyze_vals [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | analyze_vals -x - Analyses all values in stream.
+Examples: ... | analyze_vals -x -k CHR_BEG,SCORE - Analyses selected values in stream.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: September 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: BLAST sequences in stream against a specified database.
+
+Usage: ... | blast_seq [options]
+
+Options: [-d <file> | --database=<file>] - Path to database.
+Options: [-g <genome> | --genome=<dm3|hg18|mm8>] - Choose genome instead of database.
+Options: [-p <program> | --program=<program> - blastn|blastp|tblastn|blastx|tblastx - Default=guessed!
+Options: [-e <float> | --e_val=<float>] - Expectation value - Default=10
+Options: [-f | --filter] - Filter low complexity sequence - Default=OFF
+Options: [-F | --no_filter] - Disable low complexity filter - Default
+Options: [-c <int> | --cpus=<int>] - Number of CPUs to use - Default=1
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | blast_seq -d my_database - BLAST sequences in stream against custom database.
+Examples: ... | blast_seq -g hg18 - BLAST sequences in stream against hg18.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: BLAT sequences in stream against a specified genome.
+
+Usage: ... | $script [options] -g <genome>
+
+Options: [-g <genome> | --genome=<genome> ] - BLAT against genome.
+Options: [-c | --ooc] - Use overused tile file (faster, but less sensitive).
+Options: [-t <int> | --tile_size=<int>] - Size of match that triggers an alignment - Default=11
+Options: [-s <int> | --step_size=<int>] - Spacing between tiles - Default=tile_size
+Options: [-m <int> | --min_identity=<int>] - Minimum sequence identity in percent - Default=90
+Options: [-M <int> | --min_score=<int>] - Minimum score - Default=0
+Options: [-o <int> | --one_off=<int>] - Allows one mismatch in tile - Default=0
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -g hg18 - BLAT sequences in stream against hg18.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: March 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Calculate the bit score for each position based on an alignment in the stream.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Output a bitscore for each column in the alignment.
+
+Keys out: V0, V1, V2, Vn - Bit score for each position.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Complement sequences in stream.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Complements all sequences in stream.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: May 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Generates an index calculated as the most common di-residue over the sequence length for all sequences in stream. An index higher than 0.4 indicates low complexity sequence.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | $script - Analyzes all sequences in stream.
+
+Keys out: SEQ_COMPLEXITY - Calculated complexity index.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Performs computations on records in stream.
+
+Usage: ... | $script [options]
+
+Options: [-e <string> | --eval=<string>] - Evaluate extression.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | $script -e 'CHR_BEG = 50' - Set CHR_BEG to 50 in all records.
+Examples: ... | $script -e 'CHR_BEG = CHR_BEG - 50' - Substracts 50 from CHR_BEG in all records.
+Examples: ... | $script -e 'CHR_END = CHR_END + BED_LEN' - Adds BED_LEN to CHR_END in all records.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Count the number of records in stream.
+
+Usage: ... | count_records [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | count_records -x -o count.txt - Count records in stream and write result to 'count.txt'
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Count sequences in stream.
+
+Usage: ... | count_seq [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | count_seq -x -o count.txt - Output records count to 'count.txt'.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Count the number of times values of given keys exists in stream.
+
+Usage: ... | count_vals [options]
+
+Options: [-k <string> | --keys=<string>] - Comma separeted list of keys
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | count_vals -k SEQ - Count occurence of each SEQ in stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: September 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Create a BLAST database from sequences in stream for use with BLAST.
+
+Usage: ... | create_blast_db [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-d <file> | --database=<file>] - Path and name of database to create.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | create_blast_db -x -d /tmp/fly
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: January 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Create a vmatch index using mkvtree from sequences in stream for use with vmatch.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-i <file> | --index_name=<dir>] - Directory name to contain index files.
+Options: [-p <int> | --prefix_length=<int>] - Minimum prefix that can be matched - Default=guessed
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -i /tmp/fly
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Create a weight matrix of the residue composition of an alignment in the stream.
+
+Usage: ... | $script [options]
+
+Options: [-p | --percent] - Output the result in percent - Default=absolute
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -p - Creates a weight matrix in percent.
+
+Keys out: V0, V1, V2, Vn - Weight for each position.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Extract subsequence from sequences in stream.
+
+Usage: ... | extract_seq [options]
+
+Options: [-b <int> | --beg=<int>] - Begin position of subsequence (first residue=1)
+Options: [-e <int> | --end=<int>] - End position of subsequence
+Options: [-l <int> | --len=<int>] - Length of subsequence
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | extract_seq -b 1 -e 10 - Get the first 10 nucleotides of all sequences.
+Examples: ... | extract_seq -b 1 -l 10 - Get the first 10 nucleotides of all sequences.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: June 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Flip table records so rows becomes columns and visa versa.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Flip rows and columns.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Folds sequences in stream.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Folds all sequences in stream.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: April 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Extract alignment from multiple genome alignment either explicitly or using BED/PSL/BLAST entries in stream.
+
+Usage: $script [options] -g <genome>
+Usage: ... | $script [options] -g <genome>
+
+Options: [-g <genome> | --genome=<genome>] - Genome to get alignment from.
+Options: [-c <string> | --chr=<string>] - Chromosome with requested alignment.
+Options: [-b <int> | --beg=<int>] - Begin position of alignment (first residue=1).
+Options: [-e <int> | --end=<int>] - End position of alignment.
+Options: [-l <int> | --len=<int>] - Length of alignment.
+Options: [-s <int> | --strand=<int>] - Strand - Default=+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: $script -g hg18 -c chr1 -b 1 -e 10 - Get the first 10 nucleotides multiz alignment of human genome chr1.
+Examples: $script -g hg18 -c chr1 -b 1 -l 10 - Get the first 10 nucleotides multiz alignment of human genome chr1.
+Examples: ... | $script -g mm8 -s '-' - Get the reverse complement alignment of mouse BED/PSL/BLAST entries.
+
+Keys in: REC_TYPE - Optional record type (BED, PSL, or BLAST).
+Keys in: CHR - Chromosome (for use with BED record type).
+Keys in: CHR_BEG - Chromosome begin.
+Keys in: CHR_END - Chromosome end.
+Keys in: S_ID - Chromosome (for use with PSL and BLAST record type).
+Keys in: S_BEG - Chromosome begin (for use with PSL and BLAST record type).
+Keys in: S_END - Chromosome end (for use with PSL and BLAST record type).
+Keys in: STRAND - Sequence strand.
+
+Keys out: ALIGN - Alignment number that this entry belongs to.
+Keys out: CHR - Chromosome.
+Keys out: CHR_BEG - Chromosome begin.
+Keys out: CHR_END - Chromosome end.
+Keys out: STRAND - Strand.
+Keys out: SEQ - Sequence.
+Keys out: ALIGN_LEN - Sequence length.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2009
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Extract phastcons scores from a genome either explicitly or using BED/PSL/BLAST entries in stream.
+
+Usage: $script [options] -g <genome>
+Usage: ... | $script [options] -g <genome>
+
+Options: [-g <genome> | --genome=<genome>] - Genome to get phastcons scores from.
+Options: [-c <string> | --chr=<string>] - Chromosome with requested scores.
+Options: [-b <int> | --beg=<int>] - Begin position of interval (first residue=1).
+Options: [-e <int> | --end=<int>] - End position of interval.
+Options: [-l <int> | --len=<int>] - Length of interval.
+Options: [-f <int> | --flank=<int>] - Include flanking region.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: $script -g hg18 -c chr1 -b 1 -e 10 - Get the first 10 phastcons scores from human genome chr1.
+Examples: $script -g hg18 -c chr1 -b 1 -l 10 - Get the first 10 phastcons scores from human genome chr1.
+Examples: ... | $script -g mm8 -f 50 - Get phastcons scores including 50nt flanks of mouse BED/PSL/BLAST entries.
+
+Keys in: REC_TYPE - Optional record type (BED, PSL, or BLAST).
+Keys in: CHR - Chromosome (for use with BED record type).
+Keys in: CHR_BEG - Chromosome begin.
+Keys in: CHR_END - Chromosome end.
+Keys in: S_ID - Chromosome (for use with PSL and BLAST record type).
+Keys in: S_BEG - Chromosome begin (for use with PSL and BLAST record type).
+Keys in: S_END - Chromosome end (for use with PSL and BLAST record type).
+
+Keys out: CHR - Chromosome.
+Keys out: CHR_BEG - Chromosome begin.
+Keys out: CHR_END - Chromosome end.
+Keys out: PHASTCONS - Comma separated list of phastcons scores.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: December 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Extract subsequence from genome sequence either explicitly or using BED/PSL/BLAST entries in stream.
+
+Usage: $script [options] -g <genome>
+Usage: ... | $script [options] -g <genome>
+
+Options: [-g <genome> | --genome=<genome>] - Genome to get subsequence from.
+Options: [-c <string> | --chr=<string>] - Chromosome with requested subsequence.
+Options: [-b <int> | --beg=<int>] - Begin position of subsequence (first residue=1).
+Options: [-e <int> | --end=<int>] - End position of subsequence.
+Options: [-l <int> | --len=<int>] - Length of subsequence.
+Options: [-f <int> | --flank=<int>] - Include flanking sequence.
+Options: [-m | --mask] - Softmask non-exonic sequence.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: $script -g hg18 -c chr1 -b 1 -e 10 - Get the first 10 nucleotides of human genome chr1.
+Examples: $script -g hg18 -c chr1 -b 1 -l 10 - Get the first 10 nucleotides of human genome chr1.
+Examples: ... | $script -g mm8 -f 50 - Get subsequences including 50nt flanks of mouse BED/PSL/BLAST entries.
+
+Keys in: REC_TYPE - Optional record type (BED, PSL, or BLAST).
+Keys in: CHR - Chromosome (for use with BED record type).
+Keys in: CHR_BEG - Chromosome begin.
+Keys in: CHR_END - Chromosome end.
+Keys in: S_ID - Chromosome (for use with PSL and BLAST record type).
+Keys in: S_BEG - Chromosome begin (for use with PSL and BLAST record type).
+Keys in: S_END - Chromosome end (for use with PSL and BLAST record type).
+Keys in: STRAND - Sequence strand.
+
+Keys out: CHR - Chromosome.
+Keys out: CHR_BEG - Chromosome begin.
+Keys out: CHR_END - Chromosome end.
+Keys out: SEQ - Sequence.
+Keys out: SEQ_LEN - Sequence length.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Grab records in stream.
+
+Usage: ... | $script [options]
+
+Options: [-p <string> | --patterns=<string>] - Grab using comma separated list of patterns.
+Options: [-P <file> | --patterns_in=<file>] - Grab using patterns from file - one pattern per line.
+Options: [-r <string> | --regex=<string>] - Grab using Perl regex.
+Options: [-e <string> | --eval=<string> - Grab 'key,operator,value'. Operators: '>,<,>=,<=,=,!=,eq,ne'.
+Options: [-E <file> | --exact_in=<file> - Grab using exact expressions from file - one expression per line.
+Options: [-i | --invert] - Display non-mathing results.
+Options: [-c | --case_insensitive] - Turn regex matching case insensitive.
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys to grab the value for.
+Options: [-K | --keys_only] - Only grab for keys.
+Options: [-V | --vals_only] - Only grab for vals.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -p SEQ - Grab all records matching SEQ in keys or vals.
+Examples: ... | $script -p 110 -k SEQ_LEN - Grab all records where SEQ_LEN matching 110.
+Examples: ... | $script -p 110 -k SEQ_LEN -i - Grab all records where SEQ_LEN is not 110.
+Examples: ... | $script -r 'A[TUG]C' - Grab all records matching ATC, AUC, or AGC.
+Examples: ... | $script -p SEQ -K - Grab all records with a key matching SEQ.
+Examples: ... | $script -p SEQ -V - Grab all records with a value matching SEQ.
+Examples: ... | $script -e 'SEQ_LEN<30' - Grab all records with a SEQ_LEN less than 30.
+Examples: ... | $script -e 'OS eq D.mel' - Grab all records with OS equal to 'D.mel'.
+Examples: ... | $script -E names.txt - Grab all records with exact match to names in file.
+Examples: ... | $script -E seq.txt -i - Grab all records with no exact match to sequences in file.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Emit the first records in stream.
+
+Usage: ... | head_records
+
+Options: [-n <int> | --num=<int>] - Number of records to emit - Default=10
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | head_records -n 40 - Emit the 40 first records from the stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Inverts an alignment showing only non-mathing residues using the first sequence as reference.
+
+Usage: ... | $script [options]
+
+Options: [-s | --soft] - Use soft inversion instead of hard inversion.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script - Invert alignment in stream.
+Examples: ... | $script -s - Soft invert alignment in stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Determines the length of each sequence in stream - and a total length.
+
+Usage: ... | length_seq [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | length_seq -x -o length.txt - Output total length to 'length.txt'.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Determine the length of the value for given keys.
+
+Usage: ... | length_vals [options]
+
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | length_vals -k SEQ - Determine the length of all SEQ values.
+Examples: ... | length_vals -k HIT,PATTERN - Determine the length of all HIT and PATTERN values.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: January 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: List the description of all biotools.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Find all matches between the first two sequences in stream - or all self-self matches if only one sequence is found.
+
+Usage: ... | match_seq [options]
+
+Options: [-w <int> | --word_size=<int>] - Minimum match size - Default=20
+Options: [-d <string> | --direction=<string>] - Match direction: both|forward|reverse - Default=both
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | match_seq -w 25 -d forward
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Find the maximum value in the stream for given keys.
+
+Usage: ... | $script [options]
+
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys to inspect.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: ... | $script -k SEQ_LEN - Find the maximum SEQ_LEN.
+
+Keys out: <key>_MAX - Maximum value of <key>.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Calculate the mean of values of given keys.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -k SEQ_LEN,HIT_LEN -o result.txt - Calculate mean values and save to 'result.txt'.
+
+Keys out: <key>_MEAN - Mean value of <key>.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: March 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Calculate the median values of given keys.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -k SEQ_LEN,HIT_LEN -o result.txt - Calculate median values and save to 'result.txt'.
+
+Keys out: <key>_MEDIAN - Median value of <key>.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Merge values of keys in a record so that all values are joined with a delimiter and saved as value of the first key.
+
+Usage: ... | $script [options]
+
+Options: [-k <string> | --keys=<string>] - List of values to merge.
+Options: [-d <string> | --delimit=<string>] - Merge delimiter - Default='_'
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -k SEQ_NAME,CHR,CHR_BEG,CHR_END - Merges specified values and saves in SEQ_NAME.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Find the minimum value in the stream for given keys.
+
+Usage: ... | $script [options]
+
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys to inspect.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: ... | $script -k SEQ_LEN - Find the minimum SEQ_LEN.
+
+Keys out: <key>_MIN - Minimum value of <key>.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Determines the total oligo frequencies of sequences in stream.
+
+Usage: ... | oligo_freq [options]
+
+Options: [-w <int> | --word_size=<int>] - Size of oligos - Default=7.
+Options: [-a | --all] - Accumulate oligos for all sequences in stream.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | oligo_freq -w 5 - Determines oligo frequency for all sequences in stream.
+Examples: ... | oligo_freq -w 5 -a - Determines accumulated oligo frequency for all sequences in stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Scan sequences in stream or genomes for patterns using scan_for_matches.
+
+Usage: ... | $script [options]
+Usage: $script [options] -g <genome>
+
+Options: [-p <string> | --patterns=<string>] - Comma separated list of patterns to scan for.
+Options: [-P <file> | --patterns_in=<file>] - File with one pattern per line.
+Options: [-c | --comp] - Scan complementary strand as well.
+Options: [-h <int> | --max_hits=<int>] - Stop scanning after max hits.
+Options: [-m <int> | --max_misses=<int>] - Stop scanning after max misses.
+Options: [-g <genome> | --genome=<genome> - Scan genome for pattern(s).]
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -p AATTAA,ATTTAA - Scan sequences in stream for given patterns.
+Examples: $script -g hg18 -P file.pat - Scan hg18 for patterns in 'file.pat'.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Plot chromosome distribution of hits from e.g. BLAT or Vmatch.
+
+Usage: ... | plot_chrdist [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-t <string> | --terminal=<string>] - Terminal for output: dumb|post|svg - Default=dumb
+Options: [-T <string> | --title=<string>] - Set plot title.
+Options: [-X <string> | --xlabel=<string>] - Set x-axis label.
+Options: [-Y <string> | --ylabel=<string>] - Set y-axis label.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_chrdist -x - Create plot and output to STDOUT in ASCII.
+Examples: ... | plot_chrdist -x -t svg -o plot.svg - Create plot and save to 'plot.svg'.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: September 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Plot generic histogram.
+
+Usage: ... | plot_histogram [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-k <string> | --key=<string>] - Key to use for plotting.
+Options: [-t <string> | --terminal=<string>] - Terminal for output: dumb|post|svg - Default=dumb
+Options: [-T <string> | --title=<string>] - Set plot title.
+Options: [-X <string> | --xlabel=<string>] - Set x-axis label.
+Options: [-Y <string> | --ylabel=<string>] - Set y-axis label.
+Options: [-s <num|alph> | --sort=<num|alph>] - Sort criteria for x-axis keys - Default=num
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_histogram -x -k TISSUE - Create plot and output to STDOUT in ASCII.
+Examples: ... | plot_histogram -x -k TISSUE -t svg -o plot.svg - Create plot and save to 'plot.svg'.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Plot hits on a karyogram for a given genome.
+
+Usage: ... | plot_karyogram [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-g <genome> | --genome=<genome>] - Genome layout of karyogram - Default=hg18
+Options: [-f <string> | --feat_color=<string>] - Color of features - Default=black
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_karyogram -x -g mm8 -o plot.svg - Create plot and save to 'plot.svg'.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Plot length distribution.
+
+Usage: ... | plot_lendist [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-k <string> | --key=<string>] - Key to use for plotting.
+Options: [-t <string> | --terminal=<string>] - Terminal for output: dumb|post|svg - Default=dumb
+Options: [-T <string> | --title=<string>] - Set plot title.
+Options: [-X <string> | --xlabel=<string>] - Set x-axis label.
+Options: [-Y <string> | --ylabel=<string>] - Set y-axis label.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_lendist -x -k HIT_LEN - Create plot and output to STDOUT in ASCII.
+Examples: ... | plot_lendist -x -k HIT_LEN -t svg -o plot.svg - Create plot and save to 'plot.svg'.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Generate a dotplot of matches in stream.
+
+Usage: ... | plot_matches [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-t <string> | --terminal=<string> - Terminal for output: dumb|post|svg - Default=dumb
+Options: [-d <string> | --direction=<string> - Direction of matches to plot: both|forward|reverse - Default=both
+Options: [-T <string> | --title=<string>] - Set plot title.
+Options: [-X <string> | --xlabel=<string>] - Set x-axis label.
+Options: [-Y <string> | --ylabel=<string>] - Set y-axis label.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_matches -x -d forward -t svg -o plot.svg - Create plot and save to 'plot.svg'.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: January 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Generate a plot of PhastCons profiles based on chromosome coordinates in stream.
+
+Usage: ... | plot_phastcons_profiles [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-g <genome> | --genome=<genome>] - Genome from which to obtain PhastCons info.
+Options: [-m | --mean] - Calculate a mean profile.
+Options: [-M | --median] - Calculate a median profile.
+Options: [-f <int> | --flank=<int>] - Include flanking PhastCons scores.
+Options: [-t <string> | --terminal=<string> - Terminal for output: dumb|post|svg - Default=dumb
+Options: [-d <string> | --direction=<string> - Direction of matches to plot: both|forward|reverse - Default=both
+Options: [-T <string> | --title=<string>] - Set plot title.
+Options: [-X <string> | --xlabel=<string>] - Set x-axis label.
+Options: [-Y <string> | --ylabel=<string>] - Set y-axis label.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_phastcons_profiles -x -g dm3 -f 50 - Create plot of all profiles including 50 flanking scores.
+Examples: ... | plot_phastcons_profiles -x -g dm3 -f 50 -m - Create a mean plot of all profiles including 50 flanking scores.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Renders a sequence logo in SVG format from alignment in stream.
+
+Usage: ... | plot_seqlogo [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | plot_seqlogo -x -o logo.svg - Create plot and save to 'plot.svg'.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Date: January 2008
+
+Description: Prints biotools usage.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: December 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Select a number of random records from the stream.
+
+Usage: ... | random_records [options]
+
+Options: [-n <int> | --num=<int>] - Number of random records to select - Default=10
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | random_records -n 100 - Output 100 random records to stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: March 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read sequence entries from one or more files 2bit files. The length of each sequence is also determined.
+
+Usage: $script [options] -i <FASTA file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files or glob expression to read.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-N | --no_mask] - Ignore soft masking.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.2bit - Read FASTA entries from file.
+Examples: $script -i test1.2bit,test2.2bit - Read FASTA entries from files.
+Examples: $script -i '*.2bit' - Read FASTA entries from files.
+Examples: $script -i test.2bit -n 10 - Read first 10 entries from file.
+
+Keys out: SEQ_NAME - Name of sequence.
+Keys out: SEQ - Sequence.
+Keys out: SEQ_LEN - Length of sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read aligned FASTA entries from one or more files. The aligned sequence length is also determined.
+
+Usage: $script [options] -i <FASTA file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files to read.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.fna - Read aligned FASTA entries from file.
+Examples: $script -i test1.fna,test2,fna - Read aligned FASTA entries from files.
+Examples: $script -i '*.fna' - Read aligned FASTA entries from files.
+Examples: $script -i test.fna -n 10 - Read first 10 aligned FASTA entries from file.
+
+Keys out: SEQ_NAME - Name of sequence.
+Keys out: SEQ - Sequence.
+Keys out: ALIGN_LEN - Length of aligned sequence.
+Keys out: ALIGN - Number indicating what alignment this sequence belong to.
+
+
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read BED data (Browser Extensible Data).
+
+Usage: read_bed [options] -i <BED file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read input data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: read_bed -i test1.bed,test2.bed
+Examples: read_bed -i '*.bed'
+
+Keys out: CHR - Chromosome name.
+Keys out: CHR_BEG - Chromosome begin position.
+Keys out: CHR_END - Chromoeoms end position.
+Keys out: Q_ID - Query ID (feature name).
+Keys out: SCORE - Score.
+Keys out: STRAND - Strand.
+Keys out: THICK_BEG - The starting position at which the feature is drawn thickly.
+Keys out: THICK_END - The ending position at which the feature is drawn thickly.
+Keys out: ITEMRGB - An RGB value of the form R,G,B (e.g. 255,0,0).
+Keys out: BLOCKCOUNT - The number of blocks (exons) in the BED entry.
+Keys out: BLOCKSIZES - A comma separated list of the block sizes.
+Keys out: Q_BEGS - A comma separated list of block starts.
+Keys out: REC_TYPE - Record type.
+Keys out: BED_LEN - Length of BED entry.
+Keys out: BED_COLS - Number of columns in BED line.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read tabular BLAST output (-m 8 and -m 9).
+
+Usage: $script [options] -i <BLAST tabular file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read input data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script --data_in=test.blast
+
+Keys out: Q_ID - Query ID.
+Keys out: S_ID - Subject ID.
+Keys out: IDENT - Identity (%).
+Keys out: ALIGN_LEN - Alignment length.
+Keys out: MISMATCHES - Number of mismatches.
+Keys out: GAPS - Number of gaps.
+Keys out: Q_BEG - Query begin.
+Keys out: Q_END - Query end.
+Keys out: S_BEG - Subject begin.
+Keys out: S_END - Subject end.
+Keys out: E_VAL - Expect value.
+Keys out: BIT_SCORE - Bit score.
+Keys out: STRAND - Strand.
+Keys out: REC_TYPE - Record type.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: September 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read data in EMBL format.
+
+Usage: $script [options] -i <EMBL file(s)>
+
+Options: [-i <file(s)> | --data_in=<file>] - Read input data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-k <string> | --keys=<string>] - Match a subset of record keys only.
+Options: [-f <string> | --feats=<string>] - Match a subset of features only.
+Options: [-q <string> | --quals=<string>] - Match a subset of qualifiers only.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i embl.dat - Read keys, features, and qualifiers.
+Examples: $script -i embl.dat -k AC,DE - Read only Accession number and Description.
+Examples: $script -i embl.dat -k FT,SEQ -f CDS - Read subset of features matching CDS.
+Examples: $script -i embl.dat -k FT,SEQ -f CDS -q gene - Read subset of qualifiers matching gene.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read FASTA entries from one or more files. The length of each sequence is also determined.
+
+Usage: $script [options] -i <FASTA file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files or glob expression to read.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.fna - Read FASTA entries from file.
+Examples: $script -i test1.fna,test2.fna - Read FASTA entries from files.
+Examples: $script -i '*.fna' - Read FASTA entries from files.
+Examples: $script -i test.fna -n 10 - Read first 10 FASTA entries from file.
+
+Keys out: SEQ_NAME - Name of sequence.
+Keys out: SEQ - Sequence.
+Keys out: SEQ_LEN - Length of sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read Generic Feature Format (GFF v.3) from one or more files.
+
+Usage: $script [options] -i <GFF file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files or glob expression to read.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.gff - Read GFF entries from file.
+
+Keys out:
+
+Keys out: Q_ID - Feature ID.
+Keys out: SOURCE - Feature source.
+Keys out: TYPE - Feature type.
+Keys out: Q_BEG - Begin position
+Keys out: Q_END - End position
+Keys out: SCORE - Score.
+Keys out: STRAND - Strand.
+Keys out: PHASE - Phase.
+Keys out: ATT - Attributes.
+Keys out: ATT_<key> - Breakdown of Attributes into key/value pairs
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: May 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read records from a MySQL query.
+
+Usage: $script [options]
+
+Options: [-d <string> | --database=<string>] - MySQL database.
+Options: [-q <string> | --query=<string>] - MySQL query.
+Options: [-u <string> | --user=<string>] - MySQL user name - Default=<fetched from .hg.conf>
+Options: [-p <string> | --password=<string>] - MySQL password - Default=<fetched from .hg.conf>
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -d dm3 -q 'SHOW TABLES' - Retreive table information from database.
+Examples: $script -d dm3 -q 'SELECT * FROM estOrientInfo' - Retreive entire estOreintInfo.
+Examples: $script -d dm3 -q 'SELECT * FROM table WHERE Score < 100' - Retreive selected lines from table.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: December 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read data in PhastCons format which are included in the stream as BED records.
+
+Usage: $script [options] -i <PhastCons file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read input data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read and output.
+Options: [-m <int> | --min=<int>] - Minimum length of a conserved block - Default=10
+Options: [-d <int> | --dist=<int>] - Maximum distance between conserved blocks - Default=25
+Options: [-t <float> | --threshold=<int>] - Threshold for conserved block - Default=0.8
+Options: [-g <int> | --gap=<int>] - Allow micro-gap in a conserved block - Default=5
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i chr4.pp
+
+Keys out: CHR - Chromosome name.
+Keys out: CHR_BEG - Chromosome begin position.
+Keys out: CHR_END - Chromoeoms end position.
+Keys out: Q_ID - Query ID (feature name).
+Keys out: SCORE - Score.
+Keys out: STRAND - Strand.
+Keys out: THICK_BEG - The starting position at which the feature is drawn thickly.
+Keys out: THICK_END - The ending position at which the feature is drawn thickly.
+Keys out: ITEMRGB - An RGB value of the form R,G,B (e.g. 255,0,0).
+Keys out: BLOCKCOUNT - The number of blocks (exons) in the BED entry.
+Keys out: BLOCKSIZES - A comma separated list of the block sizes.
+Keys out: Q_BEGS - A comma separated list of block starts.
+Keys out: REC_TYPE - Record type.
+Keys out: BED_LEN - Length of BED entry.
+Keys out: BED_COLS - Number of columns in BED line.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read PSL data (BLAT's default output).
+
+Usage: $script [options] -i <PSL file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read PSL data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test1.psl,test2.psl
+Examples: $script -i '*.psl'
+
+Keys out: MATCHES - Number of non-repeat matches.
+Keys out: MISMATCHES - Number of mismatches.
+Keys out: REPMATCHES - Number of repeat matches.
+Keys out: NCOUNT - Number of Ns.
+Keys out: QNUMINSERT - Number of inserts in query.
+Keys out: QBASEINSERT - Number of bases inserted in query.
+Keys out: SNUMINSERT - Number of inserts in subject.
+Keys out: SBASEINSERT - Number of bases inserted in subject.
+Keys out: STRAND - Strand.
+Keys out: Q_ID - Query ID.
+Keys out: Q_LEN - Query length.
+Keys out: Q_BEG - Query begin.
+Keys out: Q_END - Query end.
+Keys out: S_ID - Subject ID.
+Keys out: S_LEN - Subject length.
+Keys out: S_BEG - Subject begin.
+Keys out: S_END - Subject end.
+Keys out: BLOCKCOUNT - Block count.
+Keys out: BLOCKSIZES - Block sizes.
+Keys out: Q_BEGS - Query sequence blocks begins.
+Keys out: S_BEGS - Subject sequence blocks begins.
+Keys out: SCORE - Score calculated as in web BLAT results.
+Keys out: REC_TYPE - Record type.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: January 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read data in GEO soft format. NCBI's deep sequencing and micro array data format.
+
+Usage: $script [options] -i <soft file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read input data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read and output.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i GSE6734_family.soft
+
+Keys out: SAMPLE_TITLE - Title of sample.
+Keys out: SEQ_NAME - Sequence name composed of Platform Series ID, Sample GEO accession, Sequence number in current experiment, and read count.
+Keys out: SEQ - Sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: April 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read Solexa deep sequenceing output files. Lowercase sequence indicates low quality.
+
+Usage: $script [options] -i <Solexa file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files or glob expression to read.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-q <int> | --quality=<int>] - Lowercase nucleotide with quality score below this limit (min:0 max:40) - Default=20
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.solexa - Read Solexa entries from file.
+Examples: $script -i test1.fna,test2.solexa - Read Solexa entries from files.
+Examples: $script -i '*.solexa' - Read Solexa entries from files.
+Examples: $script -i test.solexa -n 10 - Read first 10 Solexa entries from file.
+Examples: $script -i test.solexa -q 10 - Change quality score threshold to 10.
+
+Keys out: SEQ_NAME - Name of sequence.
+Keys out: SEQ - Sequence.
+Keys out: SEQ_LEN - Length of sequence.
+Keys out: SCORE_MEAN - Mean quality score.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: April 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read Solid sequence files with Name, Sequence and Quality.
+
+Usage: $script [options] -i <Solid file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Comma separated list of files or glob expression to read.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-q <int> | --quality=<int>] - Lowercase nucleotide with quality score below this limit (min:0 max:40) - Default=20
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.solid - Read Solid entries from file.
+Examples: $script -i test1.fna,test2.solid - Read Solid entries from files.
+Examples: $script -i '*.solid' - Read Solid entries from files.
+Examples: $script -i test.solid -n 10 - Read first 10 Solid entries from file.
+Examples: $script -i test.solid -q 10 - Change quality score threshold to 10.
+
+Keys out: SEQ_NAME - Name of sequence.
+Keys out: SEQ_CS - Sequence in color space.
+Keys out: SEQ_QUAL - Sequence quality.
+Keys out: SEQ - Sequence.
+Keys out: SEQ_LEN - Length of sequence.
+Keys out: SCORE_MEAN - Mean quality score.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: September 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read data in Stockholm format.
+
+Usage: $script [options] -i <Stockholm file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read input data from file.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.stockholm
+
+Keys out: SEQ_NAME - Sequence name.
+Keys out: SEQ - Aligned sequence.
+Keys out: ALIGN - Number indicating what alignment this sequence belongs to.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Read a table or selected table columns.
+
+Usage: $script [options] -i <tabular file(s)>
+
+Options: [-i <file(s)> | --data_in=<file(s)>] - Read tabular data from file.
+Options: [-d <regex> | --delimit=<regex>] - Changes delimiter - Default='\s+'
+Options: [-c <string> | --cols=<string>] - Comma separated list of cols to read in that order.
+Options: [-k <string> | --keys]=<string>] - Comma separated list of keys to use for each column.
+Options: [-s <int> | --skip=<int>] - Skip number of initial records.
+Options: [-n <int> | --num=<int>] - Limit number of records to read.
+Options: [-I <file> | --stream_in=<file>] - Read input stream from file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output stream to file - Default=STDOUT
+
+Examples: $script -i test.tab -d ',' -c 7,4 -k SEQ,SEQ_NAME
+
+Keys out: V0, V1, V2 ... Vn - Default column names.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Remove indels (-~.) from sequences in stream.
+
+Usage: ... | remove_indels [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | remove_indels - Removes indels from all sequences in stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Remove given keys from records in stream.
+
+Usage: ... | remove_keys [options]
+
+Options: [-k <string> | --keys=<string>] - Comma separeted list of keys to remove.
+Options: [-K <string> | --save_keys=<string>] - Remove all save these comma separeted keys.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | remove_keys -k 'SEQ_NAME,SEQ' - Removes SEQ_NAME and SEQ from all records in stream.
+Examples: ... | remove_keys -K 'SEQ_NAME,SEQ' - Removes all keys except SEQ_NAME and SEQ.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Rename keys in stream.
+
+Usage: ... | rename_keys [options]
+
+Options: [-k <search,replace> | --keys=<search,replace>] - Keys to find and replace.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | rename_keys -k PATTERN_LEN,HIT_LEN - Renames PATTERN_LEN key to HIT_LEN.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Reverse sequences in stream.
+
+Usage: ... | reverse_seq [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | reverse_seq - Reverses all sequences in stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: December 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Shuffle sequences in stream.
+
+Usage: ... | $script [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | $script - Shuffles all sequences in stream.
+
+Keys in: SEQ - Sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: December 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Sort records in the stream.
+
+Usage: ... | sort_records [options]
+
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys to sort by. Append n for numeric sorting instead of alphabetic.
+Options: [-r | --reverse] - Reverse sort order.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | sort_records -k SEQ - Output records sorted alphabetically according to SEQ.
+Examples: ... | sort_records -k SEQ_LENn - Output records sorted numerically according to SEQ_LEN.
+Examples: ... | sort_records -k SEQ_LENn,SEQ -r - Output records in reverse order sorted according to SEQ_LEN and SEQ.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Split BED records into overlapping windows.
+
+Usage: ... | $script [options]
+
+Options: [-w <int> | --window_size=<int>] - Window size - Default=20
+Options: [-s <int> | --step_size=<int>] - Step size - Default=1
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -w 12 - Split BED record into windows of size 12.
+Examples: ... | $script -s 5 - Split BED record usint windows overlapping every 5th nucleotide.
+
+Keys in: CHR - Chromosome.
+Keys in: CHR_BEG - Chromosome begin position.
+Keys in: CHR_END - Chromosome end position.
+
+Keys out: REC_TYPE - Record type (BED).
+Keys out: CHR - Chromosome.
+Keys out: CHR_BEG - Chromosome begin position.
+Keys out: CHR_END - Chromosome end position.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Split sequences in stream into overlapping oligos.
+
+Usage: ... | $script [options]
+
+Options: [-w <int> | --word_size=<int>] - Word size of oligos - Default=7
+Options: [-u | --uniq] - Only emit unique oligos.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -w 12 -u - Only emit uniq oligoes of size 12.
+
+Keys in: SEQ_NAME - Sequence name.
+Keys in: SEQ - Sequence.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Calculate the total sums for the values of given keys.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -k SEQ_LEN,HIT_LEN -o result.txt - Sum values and save to 'result.txt'.
+
+Keys out: - <key>_SUM - Sum of value of <key>
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: February 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Using the first sequence in stream as reference, tile all subsequent sequences based on pairwise alignments.
+
+Usage: ... | $script [options]
+
+Options: [-i <int> | --identity=<int>] - Minimum identity (%) for pairwise alignment - Default=70
+Options: [-s | --supress_indels] - Supress insertions in query sequence.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -i 75 - Tile all sequences in stream that have a similarity higher than 75%.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Translate DNA sequence into protein sequence.
+
+Usage: ... | $script [options]
+
+Options: [-f <string> | --frames=<string>] - Comma separated list of frames of translation: 1,2,3,-1,-2,-3 - Default=all
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -f 1 - Translate into first forward reading frame.
+Examples: ... | $script -f "-1,-2,-3" - Translate into all reverse reading frames.
+
+Keys in: SEQ - Sequence.
+
+Keys out: FRAME - Frame of translation.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Transliterate chars from sequences in stream.
+
+Usage: ... | $script [options]
+
+Options: [-s <chars> | --search=<chars>] - String of chars to locate and replace
+Options: [-r <chars> | --replace=<chars>] - String of chars for replacing
+Options: [-d <chars> | --delete=<chars>] - String of chars to delete
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -s Uu -r Tt - Replacing u with t as in converting RNA to DNA.
+Examples: ... | $script -d '.~-' - Removing indels.
+
+Keys in: SEQ - Sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: April 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Transliterate chars from values in stream.
+
+Usage: ... | $script [options]
+
+Options: [-k <string> | --keys=<string>] - List of values to transliterate
+Options: [-s <chars> | --search=<chars>] - String of chars to locate and replace
+Options: [-r <chars> | --replace=<chars>] - String of chars for replacing
+Options: [-d <chars> | --delete=<chars>] - String of chars to delete
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -k SEQ -s Uu -r Tt - Replacing u with t in SEQ (i.e converting RNA to DNA).
+Examples: ... | $script -k PATTERN -d '.~-' - Removing indels from PATTERN.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Locate records in stream where the values for a given key is unique or non-unique.
+
+Usage: ... | uniq_vals [options]
+
+Options: [-k <string> | --key=<string>] - Key for which the value is checked for uniqueness.
+Options: [-i | --invert] - Display non-unique records.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | uniq_vals -k SEQ_NAME - Locate records with unique SEQ_NAME value.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: September 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Upload data to local UCSC database for viewing in Genome Browser. In order to display secondary structures for folded RNA you must include 'rnaSecStr' in the table name.
+
+Usage: ... | $script [options] <-d <genome>> <-t <string>>
+
+Options: [-d <genome> | --database=<genome>] - Genome database to upload track to.
+Options: [-t <string> | --table=<string>] - Table name of track - NB! prefix with initials. e.g.: mah_test.
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-s <string> | --short_label=<string>] - Short label for track - Default=<table>
+Options: [-l <string> | --long_label=<string>] - Long label for track - Default=<table>
+Options: [-g <string> | --group=<string>] - Track group name - Default=m.hansen
+Options: [-p <float> | --priority=<float>] - Track display priority - Default=1
+Options: [-u | --use_score] - Use the score to grey shade the track.
+Options: [-v <string> | --visibility=<string> - Track visibility: hide|dense|squish|pack|full - Default=pack
+Options: [-w | --wiggle - Create wiggle track based on overlapping sequences.
+Options: [-c <rgb> | --color=<rgb>] - Track color e.g. '147,73,42' - Default=<random>
+Options: [-C <int> | --chunk_size=<int>] - Chunks for loading - Default=10000000
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -d hg18 -t initials_my_test
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Uppercases sequences in stream.
+
+Usage: ... | uppercase_seq [options]
+
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to file - Default=STDOUT
+
+Examples: ... | uppercase_seq - Uppercases all sequences in stream.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: October 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: vmatch sequences in stream against a specified genome.
+
+Usage: ... | $script [options] -g <genome>
+Usage: ... | $script [options] -i <index>
+
+Options: [-g <genome> | --genome=<genome] - Genome to vmatch.
+Options: [-i <index> | --index_name=<index>] - Custom index to vmatch.
+Options: [-c | --count] - Replace score with hit count.
+Options: [-m | --max_hits] - Skip hits with more than maximum hits (implies --count).
+Options: [-h <string> | --hamming_dist=<string>] - Allow mismatches.
+Options: [-e <string> | --edit_dist=<string>] - Allow mismatches and indels.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -g hg18 - Match sequences in stream againt human genome.
+Examples: ... | $script -g hg18 -h 1 - allowing for one mismatch.
+Examples: ... | $script -g hg18 -h 5p - allowing for 5% mismatches.
+Examples: ... | $script -g hg18 -e 2 - allowing for 2 mismatches or indels.
+Examples: ... | $script -g hg18 -e 10p - allowing for 10% mismatches or indels.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: March 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write sequences in 2bit format.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-N | --no_mask] - Ignore soft masking.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -o test.2bit - Write entries to file 'test.2bit'.
+
+Keys in: SEQ_NAME - Sequence name.
+Keys in: Q_ID - Used as sequence name if no SEQ_NAME.
+Keys in: SEQ - Sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write aligned sequences in pretty alignment format.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-w <int> | --wrap=<int>] - Wrap sequences to a given width.
+Options: [-R | --no_ruler] - Suppress ruler for multiple alignments.
+Options: [-C | --no_consensus] - Suppress consensus for multiple alignments.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+
+Examples: ... | $script -x -w 80 - Write entries wrapped to blocks of 80 to STDOUT.
+Examples: ... | $script -x -o test.aln - Write entries to file 'test.aln'.
+
+Keys in: SEQ_NAME - Sequence name.
+Keys in: SEQ - Sequence.
+Keys in: ALIGN - Number specifying what alignment the sequence belong to.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write records from stream as BED lines.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+Options: [-Z | --compress] - Compress output using gzip.
+
+Examples: ... | $script -x -o table.bed - Output data to file 'table.bed'.
+Examples: ... | $script -Z -o table.bed.gz - Output zipped data to file 'table.bed.gz'.
+
+Keys in: REC_TYPE - Either BED,PSL,PATSCAN,BLAST,VMATCH, otherwise generic.
+Keys in: CHR - Chromosome.
+Keys in: CHR_BEG - Chromosome begin position.
+Keys in: CHR_END - Chromosome end position.
+Keys in: Q_ID - Feature name.
+Keys in: SCORE - Score.
+Keys in: STRAND - Strand.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write BLAST records from stream in BLAST tabular format (-m8 and 9).
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-c | --comment] - Print comment line - Default=no
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+Options: [-Z | --compress] - Compress output using gzip.
+
+Examples: ... | $script -x -c -o blast.tab - Write BLAST table with comment line to 'blast.tab'.
+Examples: ... | $script -x -Z -o blast.tab.gz - Write zipped BLAST table to 'blast.tab.gz'.
+
+Keys in: Q_ID - Query ID.
+Keys in: S_ID - Subject ID.
+Keys in: IDENT - Identity.
+Keys in: ALIGN_LEN - Alignment length.
+Keys in: MISMATCHES - Mismatches.
+Keys in: GAPS - Gaps.
+Keys in: Q_BEG - Query begin.
+Keys in: Q_END - Query end.
+Keys in: S_BEG - Subject begin.
+Keys in: S_END - Subject end.
+Keys in: E_VAL - Expect value.
+Keys in: BIT_SCORE - Bit score.
+Keys in: STRAND - Strand.
+Keys in: REC_TYPE - Record type must be BLAST.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write sequences in FASTA format.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-w <int> | --wrap=<int>] - Wrap sequences to a given width.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+Options: [-Z | --compress] - Compress output using gzip.
+
+Examples: ... | $script -x -w 80 - Write entries wrapped to blocks of 80 to STDOUT.
+Examples: ... | $script -x -o test.fna - Write entries to file 'test.fna'.
+Examples: ... | $script -x -Z -o test.fna.gz - Write zipped entries to file 'test.fna.gz'.
+
+Keys in: SEQ_NAME - Sequence name.
+Keys in: Q_ID - Used as sequence name if no SEQ_NAME.
+Keys in: SEQ - Sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write records from stream in PSL format.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+Options: [-Z | --compress] - Compress output using gzip.
+
+Examples: ... | $script -x -o table.psl - Output data to file 'table.psl'.
+Examples: ... | $script -Z -o table.psl.gz - Output zipped data to file 'table.psl.gz'.
+
+Keys in: MATCHES - Number of non-repeat matches.
+Keys in: MISMATCHES - Number of mismatches.
+Keys in: REPMATCHES - Number of repeat matches.
+Keys in: NCOUNT - Number of Ns.
+Keys in: QNUMINSERT - Number of inserts in query.
+Keys in: QBASEINSERT - Number of bases inserted in query.
+Keys in: SNUMINSERT - Number of inserts in subject.
+Keys in: SBASEINSERT - Number of bases inserted in subject.
+Keys in: STRAND - Strand.
+Keys in: Q_ID - Query ID.
+Keys in: Q_LEN - Query length.
+Keys in: Q_BEG - Query begin.
+Keys in: Q_END - Query end.
+Keys in: S_ID - Subject ID.
+Keys in: S_LEN - Subject length.
+Keys in: S_BEG - Subject begin.
+Keys in: S_END - Subject end.
+Keys in: BLOCKCOUNT - Block count.
+Keys in: BLOCKSIZES - Block sizes.
+Keys in: Q_BEGS - Query sequence blocks begins.
+Keys in: S_BEGS - Subject sequence blocks begins.
+Keys in: SCORE - Score calculated as in web BLAT results.
+Keys in: REC_TYPE - Record type.
+
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: April 2008
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write di-base encoded Solid sequences.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-w <int> | --wrap=<int>] - Wrap sequences to a given width.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+Options: [-Z | --compress] - Compress output using gzip.
+
+Examples: ... | $script -x -w 80 - Write entries wrapped to blocks of 80 to STDOUT.
+Examples: ... | $script -x -o test.solid - Write entries to file 'test.solid'.
+Examples: ... | $script -x -Z -o test.solid.gz - Write zipped entries to file 'test.solid.gz'.
+
+Keys in: SEQ_NAME - Sequence name.
+Keys in: Q_ID - Used as sequence name if no SEQ_NAME.
+Keys in: SEQ - Sequence.
--- /dev/null
+Author: Martin Asser Hansen - Copyright (C) - All rights reserved
+
+Contact: mail@maasha.dk
+
+Date: August 2007
+
+License: GNU General Public License version 2 (http://www.gnu.org/copyleft/gpl.html)
+
+Description: Write records from stream as tab separated table.
+
+Usage: ... | $script [options]
+
+Options: [-x | --no_stream] - Do not emit records.
+Options: [-o <file> | --data_out=<file>] - Write result to file.
+Options: [-c | --comment] - Print comment line - Default=no
+Options: [-d <string> | --delimit=<string>] - Changes delimiter - Default='\t'
+Options: [-k <string> | --keys=<string>] - Comma separated list of keys to print in that order.
+Options: [-K <string> | --no_keys=<string>] - Comma separated list of keys to ignore.
+Options: [-I <file> | --stream_in=<file>] - Read input from stream file - Default=STDIN
+Options: [-O <file> | --stream_out=<file>] - Write output to stream file - Default=STDOUT
+Options: [-Z | --compress] - Compress output using gzip.
+
+Examples: ... | $script -c -o table.csv - Output tabular data to file 'table.csv' with comment line.
+Examples: ... | $script -k SEQ_NAME,SEQ -x - Output tabular data for columns SEQ_NAME and SEQ only.
+Examples: ... | $script -d ',' -K SEQ - Output comma separated data ignoring SEQ column.
+Examples: ... | $script -Z -o test.tab.gz - Output zipped tabular data to file 'test.tab.gz'.
--- /dev/null
+CC = gcc
+# Cflags = -Wall
+# Cflags = -Wall -g -O0 # for valgrind
+Cflags = -Wall -g -pg # for gprof
+
+INC_DIR = inc/
+LIB_DIR = lib/
+
+INC = -I $(INC_DIR)
+LIB = -lm $(LIB_DIR)*.o
+
+all: libs test test_revcomp fasta_count test_fasta test_oligo2bin repeat-O-matic test_list test_hash test_split test_binary_search test_biotools test_file_buffer test_bed
+
+libs:
+ cd $(LIB_DIR) && ${MAKE} all
+
+test: test.c
+ $(CC) $(Cflags) $(INC) $(LIB) test.c -o test
+
+test_revcomp: test_revcomp.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_revcomp.c -o test_revcomp
+
+fasta_count: fasta_count.c
+ $(CC) $(Cflags) $(INC) $(LIB) fasta_count.c -o fasta_count
+
+test_fasta: test_fasta.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_fasta.c -o test_fasta
+
+test_oligo2bin: test_oligo2bin.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_oligo2bin.c -o test_oligo2bin
+
+repeat-O-matic: repeat-O-matic.c
+ $(CC) $(Cflags) $(INC) $(LIB) repeat-O-matic.c -o repeat-O-matic
+
+test_list: test_list.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_list.c -o test_list
+
+test_hash: test_hash.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_hash.c -o test_hash
+
+test_split: test_split.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_split.c -o test_split
+
+test_binary_search: test_binary_search.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_binary_search.c -o test_binary_search
+
+test_biotools: test_biotools.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_biotools.c -o test_biotools
+
+test_file_buffer: test_file_buffer.c $(LIB_DIR)filesys.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_file_buffer.c -o test_file_buffer
+
+test_bed: test_bed.c $(LIB_DIR)ucsc.c
+ $(CC) $(Cflags) $(INC) $(LIB) test_bed.c -o test_bed
+
+clean:
+ cd $(LIB_DIR) && ${MAKE} clean
+ rm -f test test_revcomp fasta_count test_fasta test_oligo2bin repeat-O-matic test_list test_hash test_split test_binary_search test_biotools test_file_buffer test_bed
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+#include "fasta.h"
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MAIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */
+
+
+int main( int argc, char *argv[] )
+{
+ int i;
+ int count;
+ int total;
+ FILE *fp;
+
+ count = 0;
+ total = 0;
+
+ for ( i = 1; argv[ i ]; i++ )
+ {
+ fp = read_open( argv[ i ] );
+
+ count = fasta_count( fp );
+
+ close_stream( fp );
+
+ printf( "%s: %d\n", argv[ i ], count );
+
+ total += count;
+ }
+
+ if ( total > count ) {
+ printf( "total: %d\n", total );
+ }
+
+ return 0;
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */
--- /dev/null
+/* Including standard libraries */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <math.h>
+
+/* Define a shorthand for unsigned int */
+#define uint unsigned int
+
+/* Define a boolean type */
+#define bool char
+#define TRUE 1
+#define FALSE 0
+
+/* Macro for resetting a pointer to all \0's. */
+#define ZERO( pt ) ( memset( pt, '\0', sizeof( *pt ) ) )
+
+/* Macro for dynamic allocation of memory. */
+#define MEM_GET( pt ) ( pt = mem_get( sizeof( *pt ) ) )
+
+/* Macro for cloning a structure in memroy. */
+#define MEM_CLONE( pt ) mem_clone( pt, sizeof( ( pt )[ 0 ] ) )
+
+/* Macros for determining min or max of two given values. */
+#define MAX( a, b ) a < b ? b : a
+#define MIN( a, b ) a > b ? b : a
+
+/* Macros for abs and int functions. */
+#define ABS( x ) ( ( x ) < 0 ) ? -( x ) : ( x )
+#define INT( x ) ( int ) x
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> STRUCTURE DECLARATIONS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Singly linked list with a pointer to the next element and a pointer to a value. */
+struct list
+{
+ struct list *next;
+ void *val;
+};
+
+/* Singly linked list with a pointer to the next element and an integer value. */
+struct list_int
+{
+ struct list *next;
+ int val;
+};
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ERROR HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Print error message to stderr and exit. */
+void die( char *error_msg );
+
+/* Print warning message to stderr. */
+void warn( char *warn_msg );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MEMORY HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Get a pointer with a given size of allocated memory. */
+void *mem_get( size_t size );
+
+/* Get a pointer with a given size of allocated and zero'ed memory. */
+void *mem_get_zero( size_t size );
+
+/* Resize allocated memory for a given pointer. */
+void *mem_resize( void* pt, size_t size );
+
+/* Resize allocated memory for a given pointer with extra memory zero'ed. */
+void *mem_resize_zero( void* pt, size_t old_size, size_t new_size );
+
+/* Clone a structure in memory and return a pointer to the clone. */
+void *mem_clone( void *old_pt, size_t size );
+
+/* Free memory from a given pointer. */
+void mem_free( void *pt );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Binary search an array of integers for an integer value. */
+bool binary_search_array( int *array, int array_size, int val );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MISC <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Remove the last char from a string. */
+void chop( char *string );
+
+/* Remove the last char from a string if the char is a newline (safer than chop). */
+void chomp( char *string );
+
+/* Split a given line and a delimiter return the split result as a list. */
+void split( char *string, char delimit, struct list **fields );
+
+/* Mockup version of Perl substr. */
+char *substr( char *string, int offset, int len );
+
+/* Return a binary number as a string of 1's and 0's. */
+char *bits2string( uint bin );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
--- /dev/null
+#define FASTA_BUFFER 256 * 1024
+
+/* Structure of a sequence entry. */
+struct seq_entry
+{
+ char *seq_name;
+ char *seq;
+ size_t seq_len;
+};
+
+/* Count all entries in a FASTA file given a file pointer. */
+uint fasta_count( FILE *fp );
+
+/* Get next sequence entry from a FASTA file given a file pointer. */
+bool fasta_get_entry( FILE *fp, struct seq_entry *entry );
+
+/* Output a sequence entry in FASTA format. */
+void fasta_put_entry( struct seq_entry *entry );
+
+/* Get all sequence entries from a FASTA file in a list. */
+void fasta_get_entries( FILE *fp, struct list **entries );
+
+/* Output all sequence entries from a list in FASTA format. */
+void fasta_put_entries( struct list *entries );
+
+/* Deallocates memory from a seq_entry. */
+void fasta_free_entry( struct seq_entry *entry );
--- /dev/null
+//#define FILE_BUFFER_SIZE 64 * 1024
+#define FILE_BUFFER_SIZE 1024 * 1024
+
+struct file_buffer
+{
+ FILE *fp; /* file pointer */
+ char *str; /* the buffer string */
+ size_t pos; /* index pointing to last position where some token was found */
+ size_t use; /* index indicating how much of the buffer is scanned */
+ size_t end; /* end position of buffer */
+ size_t size; /* default buffer size */
+ bool eof; /* flag indicating that buffer reached EOF */
+};
+
+/* Read-open a file and return a file pointer. */
+FILE *read_open( char *file );
+
+/* Write-open a file and return a file pointer. */
+FILE *write_open( char *file );
+
+/* Append-open a file and return a file pointer. */
+FILE *append_open( char *file );
+
+/* Close a stream defined by a file pointer. */
+void close_stream( FILE *fp );
+
+/* Read in len number of bytes from the current position of a */
+/* file pointer into a string that is allocated and null terminated. */
+char *file_read( FILE *fp, size_t len );
+
+/* Delete a file. */
+void file_unlink( char *file );
+
+/* Rename a file. */
+void file_rename( char *old_name, char *new_name );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FILE BUFFER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+/* Opens a file for reading and loads a new buffer.*/
+struct file_buffer *read_open_buffer( char *file );
+
+/* Get the next char from a file buffer, which is resized if necessary, until EOF.*/
+char buffer_getc( struct file_buffer *buffer );
+
+/* Get the next line that is terminated by \n or EOF from a file buffer. */
+char *buffer_gets( struct file_buffer *buffer );
+
+/* Increases buffer size until it is larger than len. */
+void buffer_new_size( struct file_buffer *buffer, int len );
+
+/* Resize file buffer discarding any old buffer before offset, */
+/* and merge remaining old buffer with a new chunk of buffer. */
+void buffer_resize( struct file_buffer *buffer );
+
+/* Deallocates memory and close stream used by file buffer. */
+void buffer_destroy( struct file_buffer *buffer );
+
+/* Debug function that prints the content of a file_buffer. */
+void buffer_print( struct file_buffer *buffer );
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
--- /dev/null
+/* Structure of a generic hash. */
+struct hash
+{
+ struct hash_elem **table;
+ uint mask;
+ int table_size;
+ int elem_count;
+};
+
+/* Structure of a generic hash element. */
+struct hash_elem
+{
+ struct hash_elem *next;
+ char *key;
+ void *val;
+};
+
+/* Initialize a new generic hash structure. */
+struct hash *hash_new( size_t size );
+
+/* Hash function that generates a hash key, */
+uint hash_key( char *string );
+
+/* Add a new hash element consisting of a key/value pair to an existing hash. */
+void hash_add( struct hash *myhash, char *key, void *val );
+
+/* Lookup a key in a given hash and return the value - or NULL if not found. */
+void *hash_get( struct hash *myhash, char *key );
+
+/* Lookup a key in a given hash and return the hash element - or NULL if not found. */
+struct hash_elem *hash_get_elem( struct hash *myhash, char *key );
+
+/* Remove key/value pair from a given hash. Returns true if a remove was successful. */
+bool hash_del( struct hash *myhash, char *key );
+
+/* Deallocate memory for hash and all hash elements. */
+void hash_destroy( struct hash *myhash );
+
+/* Output some collision stats for a given hash. */
+void hash_collision_stats( struct hash *myhash );
--- /dev/null
+/* Add a new singly linked list element with a pointer. */
+void list_add( struct list **list_ppt, void *val );
+
+/* Add a new singly linked list element with an integer. */
+void list_add_int( struct list_int **list_ppt, int val );
+
+/* Reverse the order of elements in a singly linked list. */
+void list_reverse( void *old_list );
+
+/* Check if a given string exists in a singly linked list. */
+bool list_exists( struct list *list_pt, char *string );
+
+/* Check if a given integer exists in a singly linked list. */
+bool list_exists_int( struct list_int *list_pt, int val );
+
+/* Free memory for all elements of a singly linked list. */
+void list_free( void *list_pt );
+
+/* Debug function to print all elements from a singly linked list. */
+void list_print( struct list *list_pt );
+
--- /dev/null
+/* Macro to test if a given char is DNA. */
+#define dna_clean( c ) ( c == 'A' || c == 'a' || c == 'T' || c == 't' || c == 'C' || c == 'c' || c == 'G' || c == 'g' || c == 'N' || c == 'n' ) ? 1 : 0
+
+/* Macro to test if a given char is RNA. */
+#define rna_clean( c ) ( c == 'A' || c == 'a' || c == 'U' || c == 'u' || c == 'C' || c == 'c' || c == 'G' || c == 'g' || c == 'N' || c == 'n' ) ? 1 : 0
+
+/* Uppercase sequence. */
+void uppercase_seq( char *seq );
+
+/* Lowercase sequence. */
+void lowercase_seq( char *seq );
+
+/* Reverse compliments DNA sequence. */
+void revcomp_dna( char *seq );
+
+/* Reverse compliments RNA sequence. */
+void revcomp_rna( char *seq );
+
+/* Reverse compliment nucleotide sequnce after guessing the sequence type. */
+void revcomp_nuc( char *seq );
+
+/* Complement DNA sequence. (NB it is not reversed!). */
+void complement_dna( char *seq );
+
+/* Complement RNA sequence. (NB it is not reversed!). */
+void complement_rna( char *seq );
+
+/* Complement nucleotide sequence after guessing the sequence type. */
+void complement_nuc( char *seq );
+
+/* Reverse sequence. */
+void reverse( char *seq );
+
+/* Convert all non-nucleotide letters to Ns. */
+void seq2nuc_simple( char *seq );
+
+/* Convert DNA into RNA by change t and T to u and U, respectively. */
+void dna2rna( char *seq );
+
+/* Convert RNA into DNA by change u and U to t and T, respectively. */
+void rna2dna( char *seq );
+
+/* Check if a sequence is DNA by inspecting the first 100 residues. */
+bool is_dna( char *seq );
+
+/* Check if a sequence is RNA by inspecting the first 100 residues. */
+bool is_rna( char *seq );
+
+/* Check if a sequence is protein by inspecting the first 100 residues. */
+bool is_protein( char *seq );
+
+/* Guess if a sequence is DNA, RNA, or protein by inspecting the first 100 residues. */
+char *seq_guess_type( char *seq );
+
+/* Check if a sequence contain N or n. */
+bool contain_N( char *seq );
+
+/* Pack a nucleotide oligo (max length 15) into a binary/integer (good for hash keys). */
+int oligo2bin( char *oligo );
--- /dev/null
+#define BED_BUFFER 1024
+
+struct bed_entry3
+{
+ char *chr;
+ uint chr_beg;
+ uint chr_end;
+};
+
+struct bed_entry12
+{
+ char *chr;
+ uint chr_beg;
+ uint chr_end;
+ char *q_id;
+ float score;
+ char strand;
+ uint thick_beg;
+ uint thick_end;
+ char *itemrgb;
+ uint blockcount;
+ char *blocksizes;
+ char *q_begs;
+};
+
+void bed_get_entry( FILE *fp, struct bed_entry3 *bed, int cols );
+void bed_split( char *string, struct bed_entry12 *bed, int cols );
+
--- /dev/null
+CC = gcc
+# Cflags = -Wall
+Cflags = -Wall -g -pg # gprof
+INC_DIR = -I ../inc/
+
+all: common.o seq.o filesys.o fasta.o list.o hash.o ucsc.o
+
+common.o: common.c
+ $(CC) $(Cflags) $(INC_DIR) -c common.c
+
+seq.o: seq.c
+ $(CC) $(Cflags) $(INC_DIR) -c seq.c
+
+filesys.o: filesys.c
+ $(CC) $(Cflags) $(INC_DIR) -c filesys.c
+
+fasta.o: fasta.c
+ $(CC) $(Cflags) $(INC_DIR) -c fasta.c
+
+list.o: list.c
+ $(CC) $(Cflags) $(INC_DIR) -c list.c
+
+hash.o: hash.c
+ $(CC) $(Cflags) $(INC_DIR) -c hash.c
+
+ucsc.o: ucsc.c
+ $(CC) $(Cflags) $(INC_DIR) -c ucsc.c
+
+clean:
+ rm common.o seq.o filesys.o fasta.o list.o hash.o ucsc.o
--- /dev/null
+#include "common.h"
+#include "list.h"
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ERROR HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+void die( char *msg )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Print error message and exits. */
+
+ fprintf( stderr, "ERROR: %s\n", msg );
+
+ exit( 1 );
+}
+
+
+void warn( char *msg )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Print warning message and exits. */
+
+ fprintf( stderr, "WARNING: %s\n", msg );
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MEMORY HANDLING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+void *mem_get( size_t size )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Allocate a given chunk of memory to a pointer that is returned. */
+
+ void *pt;
+
+ if ( size == 0 ) {
+ die( "could not allocate 0 bytes of memory." );
+ } else if ( ( pt = malloc( size ) ) == NULL ) {
+ die( "could not allocate memory." );
+ }
+
+ return pt;
+}
+
+
+void *mem_get_zero( size_t size )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Allocate a given chunk of zero'ed memory to a pointer that is returned. */
+
+ void *pt;
+
+ if ( size == 0 ) {
+ die( "could not allocate 0 bytes of memory." );
+ } else if ( ( pt = malloc( size ) ) == NULL ) {
+ die( "could not allocate memory." );
+ }
+
+ memset( pt, '\0', size );
+
+ return pt;
+}
+
+
+void *mem_resize( void *pt, size_t size )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Resize an allocated chunk of memory for a given pointer and new size. */
+
+ void *pt_new;
+
+ if ( size == 0 ) {
+ die( "could not re-allocate 0 bytes of memory." );
+ } else if ( ( pt_new = realloc( pt, size ) ) == NULL ) {
+ die( "could not re-allocate memory." );
+ }
+
+ return pt_new;
+}
+
+
+void *mem_resize_zero( void *pt, size_t old_size, size_t new_size )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Resize an allocated chunk of memory for a given pointer and zero any extra memory. */
+
+ void *pt_new;
+
+ pt_new = mem_resize( pt, new_size );
+
+ if ( new_size > old_size ) {
+ memset( ( ( void * ) pt_new ) + old_size, '\0', new_size - old_size );
+ }
+
+ return pt_new;
+}
+
+
+void *mem_clone( void *old_pt, size_t size )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Clone a structure in memory and return a pointer to the clone. */
+
+ void *new_pt;
+
+ new_pt = mem_get( size );
+
+ memcpy( new_pt, old_pt, size );
+
+ return new_pt;
+}
+
+
+void mem_free( void *pt )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Free memory from a given pointer. */
+
+ if ( pt != NULL )
+ {
+ free( pt );
+
+ pt = NULL;
+ }
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ARRAYS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+bool binary_search_array( int *array, int array_size, int val )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Binary search an array of integers for an integer value. */
+
+ int high;
+ int low;
+ int try;
+
+ high = array_size;
+ low = 0;
+
+ while ( low < high )
+ {
+ try = ( ( high + low ) / 2 );
+
+ if ( val < array[ try ] ) {
+ high = try;
+ } else if ( val > array[ try ] ) {
+ low = try + 1;
+ } else {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MISC <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+void chop( char *string )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Removes the last char from a string. */
+
+ int len;
+
+ len = strlen( string );
+
+ string[ len - 1 ] = '\0';
+}
+
+
+void chomp( char *string )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Removes the last char from a string if the char is a newline. */
+
+ int len;
+
+ len = strlen( string );
+
+ if ( string[ len - 1 ] == '\n' ) {
+ string[ len - 1 ] = '\0';
+ }
+}
+
+
+void split( char *string, char delimit, struct list **fields )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Split a given line and a delimiter return the split result as a list. */
+
+ int i;
+ int j;
+
+ char field[ 256 ] = "";
+ char *field_copy;
+
+ j = 0;
+
+ for ( i = 0; string[ i ]; i++ )
+ {
+ if ( string[ i ] != delimit )
+ {
+ field[ j ] = string[ i ];
+
+ j++;
+ }
+ else
+ {
+ field_copy = mem_clone( field, j + 1 );
+
+ list_add( fields, field_copy );
+
+ ZERO( field );
+
+ j = 0;
+ }
+ }
+
+ field_copy = mem_clone( field, j + 1 );
+
+ list_add( fields, field_copy );
+
+ list_reverse( fields );
+}
+
+
+char *substr( char *string, int offset, int len )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Create equavalent of Perls substr command. */
+ /* Currently implemented without optional length */
+ /* and the replace feature. */
+
+ int string_len;
+ int i;
+ int j;
+ char *substr;
+
+ string_len = strlen( string );
+
+ if ( offset < 0 ) {
+ die( "substr offset < 0." );
+ } else if ( len < 0 ) {
+ die( "substr length < 0." );
+ } else if ( offset > string_len ) {
+ die( "substr offset outside string." );
+ } else if ( offset + len > string_len ) {
+ die( "substr offset + len outside string." );
+ }
+
+ substr = mem_get( len + 1 );
+
+ i = offset;
+ j = 0;
+
+ while ( i < offset + len )
+ {
+ substr[ j ] = string[ i ];
+
+ i++;
+ j++;
+ }
+
+ substr[ j ] = '\0';
+
+ return substr;
+}
+
+
+char *bits2string( uint bin )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Return a binary number as a string of 1's and 0's. */
+
+ int i;
+ uint j;
+ char *string;
+
+ string = mem_get( ( sizeof( uint ) * 8 ) + 1 );
+
+ j = 1;
+
+ for ( i = 0; i < sizeof( uint ) * 8; i++ )
+ {
+
+ if ( ( bin & j ) != 0 ) {
+ string[ 31 - i ] = '1';
+ } else {
+ string[ 31 - i ] = '0';
+ }
+
+ j <<= 1;
+ }
+
+ string[ i ] = '\0';
+
+ return string;
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
--- /dev/null
+#include "common.h"
+#include "fasta.h"
+#include "list.h"
+
+
+uint fasta_count( FILE *fp )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Counts all entries in a FASTA file given a file pointer. */
+
+ char buffer[ FASTA_BUFFER ];
+ uint count;
+
+ count = 0;
+
+ while ( ( fgets( buffer, sizeof( buffer ), fp ) ) != NULL )
+ {
+ if ( buffer[ 0 ] == '>' ) {
+ count++;
+ }
+ }
+
+ return count;
+}
+
+
+bool fasta_get_entry( FILE *fp, struct seq_entry *entry )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Get next sequence entry from a FASTA file given a file pointer. */
+
+ int i;
+ size_t j;
+ size_t offset;
+ size_t seq_len;
+ char buffer[ FASTA_BUFFER ];
+ int buffer_len;
+ char *seq_name = NULL;
+ char *seq = NULL;
+
+ MEM_GET( entry );
+
+ offset = ftell( fp );
+
+ /* ---- Skip ahead until header line and include header ---- */
+
+ while ( fgets( buffer, sizeof( buffer ), fp ) != NULL )
+ {
+ buffer_len = strlen( buffer );
+
+ offset += buffer_len;
+
+ if ( ( buffer[ 0 ] == '>' ) )
+ {
+ seq_name = mem_get_zero( buffer_len - 1 );
+
+ for ( i = 1; i < buffer_len - 1; i++ ) {
+ seq_name[ i - 1 ] = buffer[ i ];
+ }
+
+ seq_name[ i ] = '\0';
+
+ break;
+ }
+ }
+
+ /* ---- Determine length of sequence ---- */
+
+ seq_len = 0;
+
+ while ( ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) )
+ {
+ for ( i = 0; buffer[ i ]; i++ )
+ {
+ if ( buffer[ i ] > 32 && buffer[ i ] < 127 ) {
+ seq_len++;
+ }
+ }
+
+ if ( ( buffer[ 0 ] == '>' ) )
+ {
+ seq_len -= strlen( buffer ) - 1;
+
+ break;
+ }
+ }
+
+ /* ---- Allocate memory for sequence ---- */
+
+ seq = mem_get_zero( seq_len + 1 );
+
+ /* ---- Rewind file pointer and read sequence ---- */
+
+ if ( fseek( fp, offset, SEEK_SET ) < 0 ) {
+ die( "fseek SEEK_SET failed." );
+ }
+
+ j = 0;
+
+ while ( ( fgets( buffer, sizeof( buffer ), fp ) != NULL ) )
+ {
+ for ( i = 0; buffer[ i ]; i++ )
+ {
+ if ( buffer[ i ] > 32 && buffer[ i ] < 127 )
+ {
+ seq[ j ] = buffer[ i ];
+
+ if ( j == seq_len - 1 )
+ {
+ seq[ j + 1 ] = '\0';
+
+ entry->seq_name = seq_name;
+ entry->seq = seq;
+ entry->seq_len = seq_len;
+
+ return TRUE;
+ }
+
+ j++;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+
+void fasta_put_entry( struct seq_entry *entry )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Output a sequence entry in FASTA format. */
+ printf( ">%s\n%s\n", entry->seq_name, entry->seq );
+}
+
+
+void fasta_get_entries( FILE *fp, struct list **entries )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Given a file pointer to a FASTA file retreives all */
+ /* sequence entries and insert those in a list. */
+
+ struct seq_entry *entry;
+
+ while ( 1 )
+ {
+ MEM_GET( entry );
+
+ if ( ! fasta_get_entry( fp, entry ) ) {
+ break;
+ }
+
+ list_add( entries, entry );
+ }
+
+ list_reverse( entries );
+}
+
+
+void fasta_put_entries( struct list *entries )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Output a list of sequence entries as FASTA records. */
+
+ struct list *elem;
+
+ for ( elem = entries; elem != NULL; elem = elem->next ) {
+ fasta_put_entry( elem->val );
+ }
+}
+
+
+void fasta_free_entry( struct seq_entry *entry )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Deallocates memory from a seq_entry. */
+
+ mem_free( entry->seq_name );
+ mem_free( entry->seq );
+ mem_free( entry );
+}
+
+
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+
+
+FILE *read_open( char *file )
+{
+ /* Martin A. Hansen, November 2005 */
+
+ /* Given a file name, read-opens the file, */
+ /* and returns a file pointer. */
+
+ FILE *fp;
+ char *msg;
+
+ if ( ( fp = fopen( file, "r" ) ) == NULL )
+ {
+ sprintf( msg, "Could not read-open file '%s'.", file );
+
+ die( msg );
+ }
+
+ return fp;
+}
+
+
+FILE *write_open( char *file )
+{
+ /* Martin A. Hansen, November 2005 */
+
+ /* Given a file name, write-opens the file, */
+ /* and returns a file pointer. */
+
+ FILE *fp;
+ char *msg;
+
+ if ( ( fp = fopen( file, "w" ) ) == NULL )
+ {
+ sprintf( msg, "Could not write-open file '%s'.", file );
+
+ die( msg );
+ }
+
+ return fp;
+}
+
+
+FILE *append_open( char *file )
+{
+ /* Martin A. Hansen, November 2005 */
+
+ /* Given a file name, append-opens the file, */
+ /* and returns a file pointer. */
+
+ FILE *fp;
+ char *msg;
+
+ if ( ( fp = fopen( file, "a" ) ) == NULL )
+ {
+ sprintf( msg, "Could not append-open file '%s'.", file );
+
+ die( msg );
+ }
+
+ return fp;
+}
+
+
+void close_stream( FILE *fp )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Closes a stream or file associated with a given file pointer. */
+
+ if ( ( fclose( fp ) ) != 0 ) {
+ die( "Could not close stream." );
+ }
+}
+
+
+char *file_read( FILE *fp, size_t len )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Read in len number of bytes from the current position of a */
+ /* file pointer into a string that is allocated and null terminated. */
+
+ char *string;
+
+ string = mem_get( len + 1 );
+
+ fread( string, len, 1, fp );
+
+ if ( ferror( fp ) != 0 ) {
+ die( "fread failed." );
+ }
+
+ string[ len + 1 ] = '\0';
+
+ return string;
+}
+
+
+void file_unlink( char *file )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Delete a file. */
+
+ char *msg;
+
+ if ( unlink( file ) == -1 )
+ {
+ sprintf( msg, "Could not delete file '%s'.", file );
+
+ die( msg );
+ }
+}
+
+
+void file_rename( char *old_name, char *new_name )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Rename a file. */
+
+ char *msg;
+
+ if ( rename( old_name, new_name ) == -1 )
+ {
+ sprintf( msg, "Could not rename file '%s' -> '%s'.", old_name, new_name );
+
+ die( msg );
+ }
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> FILE BUFFER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
+
+
+struct file_buffer *read_open_buffer( char *file )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Opens a file for reading and loads a new buffer.*/
+
+ struct file_buffer *buffer;
+ FILE *fp;
+ char *str;
+ bool eof;
+
+ MEM_GET( buffer );
+
+ fp = read_open( file );
+
+ str = file_read( fp, FILE_BUFFER_SIZE );
+
+ feof( fp ) ? ( eof = TRUE ) : ( eof = FALSE );
+
+ buffer->fp = fp;
+ buffer->str = str;
+ buffer->pos = 0;
+ buffer->use = 0;
+ buffer->end = strlen( str );
+ buffer->size = FILE_BUFFER_SIZE;
+ buffer->eof = eof;
+
+ return buffer;
+}
+
+
+char buffer_getc( struct file_buffer *buffer )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Get the next char from a file buffer, which is resized if necessary, until EOF.*/
+
+ while ( 1 )
+ {
+ if ( buffer->use == buffer->end )
+ {
+ if ( buffer->eof )
+ {
+ return '\0';
+ }
+ else
+ {
+ buffer->pos = buffer->use;
+ buffer_new_size( buffer, buffer->use );
+ buffer_resize( buffer );
+ }
+ }
+
+ return buffer->str[ buffer->use++ ];
+ }
+}
+
+
+char *buffer_gets( struct file_buffer *buffer )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Get the next line that is terminated by \n or EOF from a file buffer. */
+
+ char *pt;
+ char *line;
+ size_t line_size;
+
+ while ( 1 )
+ {
+ if ( ( pt = memchr( &buffer->str[ buffer->use ], '\n', buffer->end - buffer->use ) ) != NULL )
+ {
+ line_size = pt - &buffer->str[ buffer->use ] + 1;
+
+ line = mem_get( line_size );
+
+ memcpy( line, &buffer->str[ buffer->use ], line_size );
+
+ line[ line_size ] = '\0';
+
+ buffer->use += line_size;
+
+ buffer_new_size( buffer, line_size );
+
+ return line;
+ }
+ else
+ {
+ if ( buffer->eof ) {
+ return NULL;
+ } else {
+ buffer_resize( buffer );
+ }
+ }
+ }
+}
+
+
+void buffer_new_size( struct file_buffer *buffer, int len )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Increases buffer size until it is larger than len. */
+
+ while ( buffer->size < len )
+ {
+ buffer->size <<= 1;
+
+ if ( buffer->size <= 0 ) {
+ die( "buffer_new_size failed." );
+ }
+ }
+}
+
+
+void buffer_resize( struct file_buffer *buffer )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Resize file buffer. */
+
+ char *str;
+ size_t str_len;
+ size_t new_end;
+
+ str = file_read( buffer->fp, buffer->size );
+
+ str_len = strlen( str );
+
+ feof( buffer->fp ) ? ( buffer->eof = TRUE ) : ( buffer->eof = FALSE );
+
+ if ( buffer->pos != 0 )
+ {
+ memmove( buffer->str, &buffer->str[ buffer->pos ], buffer->use - buffer->pos );
+
+ buffer->end -= buffer->pos;
+ buffer->use = 0;
+ buffer->pos = 0;
+ }
+
+ new_end = buffer->end + str_len;
+
+ buffer->str = mem_resize( buffer->str, new_end + 1 );
+
+ memcpy( &buffer->str[ buffer->end ], str, str_len );
+
+ buffer->str[ new_end + 1 ] = '\0';
+
+ buffer->end = new_end;
+
+ mem_free( str );
+}
+
+
+void buffer_destroy( struct file_buffer *buffer )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Deallocates memory and close stream used by file buffer. */
+
+ close_stream( buffer->fp );
+
+ mem_free( buffer->str );
+ mem_free( buffer );
+}
+
+
+void buffer_print( struct file_buffer *buffer )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Debug function that prints the content of a file_buffer. */
+
+ printf( "buffer: {\n" );
+ printf( " pos : %lu\n", buffer->pos );
+ printf( " use : %lu\n", buffer->use );
+ printf( " end : %lu\n", buffer->end );
+ printf( " eof : %d\n", buffer->eof );
+ printf( " str : ->%s<-\n", buffer->str );
+ printf( " str_len: %lu\n", strlen( buffer->str ) );
+ printf( "}\n" );
+}
+
+
+/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
--- /dev/null
+#include "common.h"
+#include "hash.h"
+#include "list.h"
+
+
+struct hash *hash_new( size_t size )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Initialize a new generic hash structure. */
+
+ struct hash *new_hash;
+ int table_size;
+
+ MEM_GET( new_hash );
+
+ table_size = 1 << size; /* table_size = ( 2 ** size ) */
+
+ new_hash->table_size = table_size;
+ new_hash->mask = table_size - 1;
+ new_hash->table = mem_get( sizeof( struct hash_elem * ) * table_size );
+
+ new_hash->elem_count = 0;
+
+ return new_hash;
+}
+
+
+void hash_add( struct hash *myhash, char *key, void *val )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Add a new hash element consisting of a key/value pair to an existing hash. */
+
+ struct hash_elem *old_elem;
+ struct hash_elem *new_elem;
+ int hash_index;
+
+ if ( ( old_elem = hash_get_elem( myhash, key ) ) != NULL )
+ {
+ old_elem->val = val;
+ }
+ else
+ {
+ MEM_GET( new_elem );
+
+ hash_index = ( hash_key( key ) & myhash->mask );
+
+ new_elem->key = mem_clone( key, strlen( key ) );
+ new_elem->val = val;
+ new_elem->next = myhash->table[ hash_index ];
+
+ myhash->table[ hash_index ] = new_elem;
+ myhash->elem_count++;
+ }
+}
+
+
+void *hash_get( struct hash *myhash, char *key )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Lookup a key in a given hash and return the value - or NULL if not found. */
+
+ struct hash_elem *bucket;
+
+ bucket = myhash->table[ ( hash_key( key ) & myhash->mask ) ];
+
+ while ( bucket != NULL )
+ {
+ if ( strcmp( bucket->key, key ) == 0 ) {
+ return bucket->val;
+ }
+
+ bucket = bucket->next;
+ }
+
+ return NULL;
+}
+
+
+struct hash_elem *hash_get_elem( struct hash *myhash, char *key )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Lookup a key in a given hash and return the hash element - or NULL if not found. */
+
+ struct hash_elem *bucket;
+
+ bucket = myhash->table[ ( hash_key( key ) & myhash->mask ) ];
+
+ while ( bucket != NULL )
+ {
+ if ( strcmp( bucket->key, key ) == 0 ) {
+ return bucket;
+ }
+
+ bucket = bucket->next;
+ }
+
+ return NULL;
+}
+
+
+bool hash_del( struct hash *myhash, char *key )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Remove key/value pair from a given hash. */
+ /* Returns true if a remove was successful. */
+
+ struct hash_elem *bucket;
+
+ bucket = myhash->table[ ( hash_key( key ) & myhash->mask ) ];
+
+ while ( bucket != NULL )
+ {
+ if ( strcmp( bucket->key, key ) == 0 )
+ {
+ myhash->elem_count--;
+ return TRUE;
+ }
+
+ bucket = bucket->next;
+ }
+
+ return FALSE;
+}
+
+
+void hash_destroy( struct hash *myhash )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Deallocate memory for hash and all hash elements. */
+
+ int i;
+ struct hash_elem *bucket;
+
+ for ( i = 0; i < myhash->table_size; i++ )
+ {
+ for ( bucket = myhash->table[ i ]; bucket != NULL; bucket = bucket->next )
+ {
+ mem_free( bucket->key );
+// mem_free( bucket->val );
+ mem_free( bucket );
+ }
+ }
+
+ mem_free( myhash->table );
+ mem_free( myhash );
+}
+
+
+uint hash_key( char *string )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Hash function that generates a hash key, */
+ /* based on the Jim Kent's stuff. */
+
+ char *key = string;
+ uint result = 0;
+ int c;
+
+ while ( ( c = *key++ ) != '\0' ) {
+ result += ( result << 3 ) + c;
+ }
+
+ return result;
+}
+
+
+void hash_collision_stats( struct hash *myhash )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Output some collision stats for a given hash. */
+
+ /* Use with biotools: ... | plot_histogram -k Col -x */
+
+ int i;
+ int col;
+ struct hash_elem *bucket;
+
+ for ( i = 0; i < myhash->table_size; i++ )
+ {
+ col = 0;
+
+ for ( bucket = myhash->table[ i ]; bucket != NULL; bucket = bucket->next ) {
+ col++;
+ }
+
+ printf( "Col: %d\n---\n", col );
+ }
+}
--- /dev/null
+#include "common.h"
+#include "list.h"
+
+
+void list_add( struct list **list_ppt, void *val )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Add a new singly linked list element with a pointer. */
+
+ struct list *elem = NULL;
+
+ MEM_GET( elem );
+
+ elem->val = val;
+ elem->next = *( list_ppt );
+ *( list_ppt ) = ( elem );
+}
+
+
+void list_add_int( struct list_int **list_ppt, int val )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Add a new singly linked list element with a integer. */
+
+ struct list_int *elem = NULL;
+
+ MEM_GET( elem );
+
+ elem->val = val;
+ elem->next = *( list_ppt );
+ *( list_ppt ) = ( elem );
+}
+
+
+void list_reverse( void *old_list )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Reverse the order of elements in a singly linked list. */
+
+ struct list **ppt = ( struct list ** ) old_list;
+ struct list *new_list = NULL;
+ struct list *elem;
+ struct list *next;
+
+ next = *ppt;
+
+ while ( next != NULL )
+ {
+ elem = next;
+ next = elem->next;
+ elem->next = new_list;
+ new_list = elem;
+ }
+
+ *ppt = new_list;
+}
+
+
+bool list_exists( struct list *list_pt, char *string )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Check if a given string exists in a singly linked list. */
+
+ struct list *elem;
+
+ MEM_GET( elem );
+
+ for ( elem = list_pt; elem != NULL; elem = elem->next )
+ {
+ if ( strcmp( elem->val, string ) == 0 ) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+bool list_exists_int( struct list_int *list_pt, int val )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Check if a given integer exists in a singly linked list. */
+
+ struct list_int *elem;
+
+ MEM_GET( elem );
+
+ for ( elem = list_pt; elem != NULL; elem = elem->next )
+ {
+ if ( elem->val == val ) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+void list_free( void *list_pt )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Free memory for all elements of a singly linked list. */
+
+ struct list **ppt = ( struct list ** ) list_pt;
+ struct list *next = *ppt;
+ struct list *elem;
+
+ while ( next != NULL )
+ {
+ elem = next;
+ next = elem->next;
+ mem_free( elem );
+ }
+
+ ppt = NULL;
+}
+
+
+void list_print( struct list *list_pt )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Debug function to print all elements from a singly linked list. */
+
+ int i = 0;
+
+ struct list *elem;
+
+ for ( elem = list_pt; elem != NULL; elem = elem->next )
+ {
+ printf( "elem %d: ->%s<-\n", i, ( char * ) elem->val );
+
+ i++;
+ }
+}
+
--- /dev/null
+#include "common.h"
+#include "seq.h"
+
+
+void uppercase_seq( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Uppercase a sequence in place. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ ) {
+ seq[ i ] = toupper( seq[ i ] );
+ }
+}
+
+
+void lowercase_seq( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Lowercase a sequence in place. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ ) {
+ seq[ i ] = tolower( seq[ i ] );
+ }
+}
+
+
+void revcomp_dna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Reverse complement a DNA sequence in place. */
+
+ complement_dna( seq );
+ reverse( seq );
+}
+
+
+void revcomp_rna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Reverse complement a RNA sequence in place. */
+
+ complement_rna( seq );
+ reverse( seq );
+}
+
+
+void revcomp_nuc( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Reverse complements a nucleotide sequence in place. */
+
+ complement_nuc( seq );
+ reverse( seq );
+}
+
+
+void complement_nuc( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Complements a nucleotide sequence, */
+ /* after guess the type. */
+
+ if ( is_dna( seq ) ) {
+ complement_dna( seq );
+ } else if ( is_rna( seq ) ) {
+ complement_rna( seq );
+ } else {
+ die( "Complement nuc failed.\n" );
+ }
+}
+
+
+void complement_dna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Complements a DNA sequence including */
+ /* ambiguity coded nucleotides. */;
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'a': seq[ i ] = 't'; break;
+ case 'A': seq[ i ] = 'T'; break;
+ case 'c': seq[ i ] = 'g'; break;
+ case 'C': seq[ i ] = 'G'; break;
+ case 'g': seq[ i ] = 'c'; break;
+ case 'G': seq[ i ] = 'C'; break;
+ case 't': seq[ i ] = 'a'; break;
+ case 'u': seq[ i ] = 'a'; break;
+ case 'T': seq[ i ] = 'A'; break;
+ case 'U': seq[ i ] = 'A'; break;
+ case 'm': seq[ i ] = 'k'; break;
+ case 'M': seq[ i ] = 'K'; break;
+ case 'r': seq[ i ] = 'y'; break;
+ case 'R': seq[ i ] = 'Y'; break;
+ case 'w': seq[ i ] = 'w'; break;
+ case 'W': seq[ i ] = 'W'; break;
+ case 's': seq[ i ] = 'S'; break;
+ case 'S': seq[ i ] = 'S'; break;
+ case 'y': seq[ i ] = 'r'; break;
+ case 'Y': seq[ i ] = 'R'; break;
+ case 'k': seq[ i ] = 'm'; break;
+ case 'K': seq[ i ] = 'M'; break;
+ case 'b': seq[ i ] = 'v'; break;
+ case 'B': seq[ i ] = 'V'; break;
+ case 'd': seq[ i ] = 'h'; break;
+ case 'D': seq[ i ] = 'H'; break;
+ case 'h': seq[ i ] = 'd'; break;
+ case 'H': seq[ i ] = 'D'; break;
+ case 'v': seq[ i ] = 'b'; break;
+ case 'V': seq[ i ] = 'B'; break;
+ case 'n': seq[ i ] = 'n'; break;
+ case 'N': seq[ i ] = 'N'; break;
+ default: break;
+ }
+ }
+}
+
+
+void complement_rna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Complements an RNA sequence including */
+ /* ambiguity coded nucleotides. */;
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'a': seq[ i ] = 'u'; break;
+ case 'A': seq[ i ] = 'U'; break;
+ case 'c': seq[ i ] = 'g'; break;
+ case 'C': seq[ i ] = 'G'; break;
+ case 'g': seq[ i ] = 'c'; break;
+ case 'G': seq[ i ] = 'C'; break;
+ case 't': seq[ i ] = 'a'; break;
+ case 'u': seq[ i ] = 'a'; break;
+ case 'T': seq[ i ] = 'A'; break;
+ case 'U': seq[ i ] = 'A'; break;
+ case 'm': seq[ i ] = 'k'; break;
+ case 'M': seq[ i ] = 'K'; break;
+ case 'r': seq[ i ] = 'y'; break;
+ case 'R': seq[ i ] = 'Y'; break;
+ case 'w': seq[ i ] = 'w'; break;
+ case 'W': seq[ i ] = 'W'; break;
+ case 's': seq[ i ] = 'S'; break;
+ case 'S': seq[ i ] = 'S'; break;
+ case 'y': seq[ i ] = 'r'; break;
+ case 'Y': seq[ i ] = 'R'; break;
+ case 'k': seq[ i ] = 'm'; break;
+ case 'K': seq[ i ] = 'M'; break;
+ case 'b': seq[ i ] = 'v'; break;
+ case 'B': seq[ i ] = 'V'; break;
+ case 'd': seq[ i ] = 'h'; break;
+ case 'D': seq[ i ] = 'H'; break;
+ case 'h': seq[ i ] = 'd'; break;
+ case 'H': seq[ i ] = 'D'; break;
+ case 'v': seq[ i ] = 'b'; break;
+ case 'V': seq[ i ] = 'B'; break;
+ case 'n': seq[ i ] = 'n'; break;
+ case 'N': seq[ i ] = 'N'; break;
+ default: break;
+ }
+ }
+}
+
+
+void reverse( char *string )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Reverses a string in place. */
+
+ char c;
+ size_t i;
+ size_t j;
+
+ i = 0;
+ j = strlen( string ) - 1;
+
+ while ( i <= j )
+ {
+ c = string[ i ];
+
+ string[ i ] = string[ j ];
+ string[ j ] = c;
+
+ i++;
+ j--;
+ }
+}
+
+
+void seq2nuc_simple( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Uppercases all DNA letters, while transforming */
+ /* all non-DNA letters in sequence to Ns. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'A': break;
+ case 'T': break;
+ case 'C': break;
+ case 'G': break;
+ case 'U': break;
+ case 'N': break;
+ case 'a': seq[ i ] = 'A'; break;
+ case 't': seq[ i ] = 'T'; break;
+ case 'c': seq[ i ] = 'C'; break;
+ case 'g': seq[ i ] = 'G'; break;
+ case 'u': seq[ i ] = 'U'; break;
+ default: seq[ i ] = 'N';
+ }
+ }
+}
+
+
+void dna2rna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Converts a DNA sequence to RNA by changing T and t to U and u. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 't': seq[ i ] = 'u'; break;
+ case 'T': seq[ i ] = 'U'; break;
+ default: break;
+ }
+ }
+}
+
+
+void rna2dna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Converts a RNA sequence to RNA by changing T and u to T and t. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'u': seq[ i ] = 't'; break;
+ case 'U': seq[ i ] = 'T'; break;
+ default: break;
+ }
+ }
+}
+
+
+bool is_dna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Determines if a given sequence is DNA, */
+ /* from inspection of the first 100 residues. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'A': case 'a': break;
+ case 'G': case 'g': break;
+ case 'C': case 'c': break;
+ case 'T': case 't': break;
+ case 'R': case 'r': break;
+ case 'Y': case 'y': break;
+ case 'W': case 'w': break;
+ case 'S': case 's': break;
+ case 'M': case 'm': break;
+ case 'K': case 'k': break;
+ case 'H': case 'h': break;
+ case 'D': case 'd': break;
+ case 'V': case 'v': break;
+ case 'B': case 'b': break;
+ case 'N': case 'n': break;
+ case '-': break;
+ case '~': break;
+ case '_': break;
+ case '.': break;
+ default: return FALSE;
+ }
+
+ if ( i == 100 ) {
+ break;
+ }
+ }
+
+ return TRUE;
+}
+
+
+bool is_rna( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Determines if a given sequence is RNA, */
+ /* from inspection of the first 100 residues. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'A': case 'a': break;
+ case 'G': case 'g': break;
+ case 'C': case 'c': break;
+ case 'U': case 'u': break;
+ case 'R': case 'r': break;
+ case 'Y': case 'y': break;
+ case 'W': case 'w': break;
+ case 'S': case 's': break;
+ case 'M': case 'm': break;
+ case 'K': case 'k': break;
+ case 'H': case 'h': break;
+ case 'D': case 'd': break;
+ case 'V': case 'v': break;
+ case 'B': case 'b': break;
+ case 'N': case 'n': break;
+ case '-': break;
+ case '~': break;
+ case '_': break;
+ case '.': break;
+ default: return FALSE;
+ }
+
+ if ( i == 100 ) {
+ break;
+ }
+ }
+
+ return TRUE;
+}
+
+
+bool is_protein( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Determines if a given sequence is protein, */
+ /* from inspection of the first 100 residues. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'K': case 'k': break;
+ case 'R': case 'r': break;
+ case 'H': case 'h': break;
+ case 'D': case 'd': break;
+ case 'E': case 'e': break;
+ case 'S': case 's': break;
+ case 'T': case 't': break;
+ case 'N': case 'n': break;
+ case 'Q': case 'q': break;
+ case 'A': case 'a': break;
+ case 'V': case 'v': break;
+ case 'I': case 'i': break;
+ case 'L': case 'l': break;
+ case 'M': case 'm': break;
+ case 'F': case 'f': break;
+ case 'Y': case 'y': break;
+ case 'W': case 'w': break;
+ case 'C': case 'c': break;
+ case 'G': case 'g': break;
+ case 'P': case 'p': break;
+ case 'Z': case 'z': break;
+ case 'B': case 'b': break;
+ case 'X': case 'x': break;
+ case '*': break;
+ case '-': break;
+ case '~': break;
+ case '_': break;
+ case '.': break;
+ default: return FALSE;
+ }
+
+ if ( i == 100 ) {
+ break;
+ }
+ }
+
+ return TRUE;
+}
+
+
+char *seq_guess_type( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Guess the type of a given sequnce, */
+ /* which is returned as a pointer to a string. */
+
+ char *type;
+
+ type = mem_get( 8 );
+
+ if ( is_dna( seq ) ) {
+ type = "DNA";
+ } else if ( is_rna( seq ) ) {
+ type = "RNA";
+ } else if ( is_protein( seq ) ) {
+ type = "PROTEIN";
+ } else {
+ die( "Could not guess sequence type.\n" );
+ }
+
+ return type;
+}
+
+
+bool contain_N( char *seq )
+{
+ /* Martin A. Hansen, May 2008 */
+
+ /* Check if a sequence contain N or n residues. */
+
+ size_t i;
+
+ for ( i = 0; seq[ i ]; i++ )
+ {
+ switch ( seq[ i ] )
+ {
+ case 'N': case 'n': return TRUE;
+ default: break;
+ }
+ }
+
+ return FALSE;
+}
+
+
+int oligo2bin( char *oligo )
+{
+ /* Martin A. Hansen, August 2004 */
+
+ /* Pack a max 15 nucleotide long oligo into a four byte integer. */
+
+ int i;
+ int bin;
+
+ if ( strlen( oligo ) > 15 ) {
+ die( "Oligo will not fit in an integer." );
+ }
+
+ bin = 0;
+
+ for ( i = 0; oligo[ i ]; i++ )
+ {
+ bin <<= 2;
+
+ switch ( oligo[ i ] )
+ {
+ case 'A': case 'a': bin |= 0; break;
+ case 'N': case 'n': bin |= 0; break;
+ case 'T': case 't': bin |= 1; break;
+ case 'U': case 'u': bin |= 1; break;
+ case 'C': case 'c': bin |= 2; break;
+ case 'G': case 'g': bin |= 3; break;
+ default: die( "Unrecognized nucleotide." );
+ }
+ }
+
+ return bin;
+}
--- /dev/null
+#include "common.h"
+#include "ucsc.h"
+
+void bed_get_entry( FILE *fp, struct bed_entry3 *bed, int cols )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Get next 3 column bed entry from stream. */
+
+ char bed_buffer[ BED_BUFFER ];
+ struct bed_entry12 *bed12 = NULL;
+
+ MEM_GET( bed12 );
+
+ if ( ( fgets( bed_buffer, sizeof( bed_buffer ), fp ) != NULL ) )
+ {
+ printf( "buffer: %s\n", bed_buffer );
+
+ bed_split( bed_buffer, bed12, 3 );
+
+ return;
+ }
+
+ return NULL;
+}
+
+
+void bed_split( char *string, struct bed_entry12 *bed, int cols )
+{
+ int i;
+ int field_num;
+ int offset;
+ char *new_line;
+ int new_line_pos;
+ char *pt;
+ int pos;
+ int field_seps[ cols ];
+ int field_len;
+ char *field;
+
+ if ( ( new_line = memchr( string, '\n', 1024 ) ) != NULL ) {
+ new_line_pos = new_line - string;
+ } else {
+ die( "bed_split: no newline found." );
+ }
+
+ field_num = 0;
+ offset = 0;
+
+ for ( i = 0; i < cols; i++ )
+ {
+ if ( ( pt = memchr( &string[ offset ], '\t', new_line_pos - offset ) ) != NULL )
+ {
+ pos = pt - string;
+
+ pos = MIN( pos, new_line_pos );
+
+ field_seps[ field_num ] = pos;
+
+ field_num++;
+ }
+ else
+ {
+ die( "bed_split: no tab found." );
+ }
+
+ offset += pos + 1;
+ }
+
+ offset = 0;
+
+ for ( i = 0; i < cols; i++ )
+ {
+ field_len = field_seps[ i ] - offset;
+
+ field = mem_get( field_len );
+
+ field[ field_len ] = '\0';
+
+ memcpy( field, &string[ offset ], field_len );
+
+ if ( i == 0 ) {
+ bed->chr = mem_clone( ( char * ) field, field_len );
+ } else if ( i == 1 ) {
+ bed->chr_beg = strtod( field, &pt );
+ } else if ( i == 2 ) {
+ bed->chr_end = strtod( field, &pt );
+ } else if ( i == 3 ) {
+ bed->q_id = mem_clone( ( char * ) field, field_len );
+ } else if ( i == 4 ) {
+ bed->score = strtof( field, &pt );
+ } else if ( i == 5 ) {
+ bed->strand = field[ 0 ];
+ } else if ( i == 6 ) {
+ bed->thick_beg = strtod( field, &pt );
+ } else if ( i == 7 ) {
+ bed->thick_end = strtod( field, &pt );
+ } else if ( i == 8 ) {
+ bed->itemrgb = mem_clone( ( char * ) field, field_len );
+ } else if ( i == 9 ) {
+ bed->blockcount = strtod( field, &pt );
+ } else if ( i == 10 ) {
+ bed->blocksizes = mem_clone( ( char * ) field, field_len );
+ } else if ( i == 11 ) {
+ bed->q_begs = mem_clone( ( char * ) field, field_len );
+ }
+
+ if ( pt == NULL ) {
+ die( "bed parse failed." );
+ }
+
+ offset = field_seps[ i ] + 1;
+ }
+
+ printf( "chr ->%s\n", bed->chr );
+ printf( "chr_beg->%u\n", bed->chr_beg );
+ printf( "chr_end->%u\n", bed->chr_end );
+}
+
+
--- /dev/null
+/*
+ Copyright (C) 2008, Martin A. Hansen
+
+ This program determines the repetiveness of a genome by determining
+ the number of identical 15-mers for each position in the genome.
+
+ The output is a fixedStep file ala the phastCons files from the UCSC
+ Genome browser.
+
+ It is very fast and efficient using less than 8 Gb of memory to
+ complete the human genome in roughly 30 minutes.
+*/
+
+
+
+
+#include <stdio.h>
+#include <string.h>
+#include "common.h"
+#include "filesys.h"
+#include "fasta.h"
+
+#define OLIGO_SIZE 15
+#define SIZE ( 1 << ( OLIGO_SIZE * 2 ) )
+
+#define UINT_BITS 32
+#define T 3 /* 11 on the rightmost two bits of bin. */
+#define C 1 /* 01 on the rightmost two bits of bin. */
+#define G 2 /* 10 on the rightmost two bits of bin. */
+
+uint mask_create( int oligo_size );
+uint *oligo_count( char *path );
+void oligo_count_output( char *path, uint *array );
+void fixedstep_put_entry( char *chr, int beg, int step_size, uint *block_array, int block_size );
+
+int main( int argc, char *argv[] )
+{
+ char *path;
+ uint *array;
+
+ path = argv[ 1 ];
+
+ array = oligo_count( path );
+
+ oligo_count_output( path, array );
+
+ return 0;
+}
+
+
+uint mask_create( int oligo_size )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Create a bit mask for binary encode oligos less than sizeof( uint ). */
+
+ uint i;
+ uint mask;
+
+ mask = 0;
+
+ for ( i = 0; i < oligo_size; i++ )
+ {
+ mask <<= 2;
+
+ mask |= 3;
+ }
+
+ return mask;
+}
+
+
+uint *oligo_count( char *path )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Count the occurence of all oligos of a fixed size in a FASTA file. */
+
+ uint *array;
+ uint i;
+ uint mask;
+ uint bin;
+ uint bin_rc1;
+ uint bin_rc2;
+ uint j;
+ uint A_rc = ( 3 << ( UINT_BITS - 2 ) ); /* 11 on the leftmost two bits an uint. */
+ uint G_rc = ( 2 << ( UINT_BITS - 2 ) ); /* 10 on the leftmost two bits an uint. */
+ uint C_rc = ( 1 << ( UINT_BITS - 2 ) ); /* 01 on the leftmost two bits an uint. */
+ struct seq_entry *entry;
+ FILE *fp;
+
+ array = mem_get_zero( sizeof( uint ) * SIZE );
+
+ mask = mask_create( OLIGO_SIZE );
+
+ MEM_GET( entry );
+
+ fp = read_open( path );
+
+ while ( ( fasta_get_entry( fp, entry ) ) )
+ {
+ fprintf( stderr, "Counting oligos in: %s ... ", entry->seq_name );
+
+ bin = 0;
+ bin_rc1 = 0;
+ j = 0;
+
+ for ( i = 0; entry->seq[ i ]; i++ )
+ {
+ bin <<= 2;
+ bin_rc1 >>= 2;
+
+ switch( entry->seq[ i ] )
+ {
+ case 'A': case 'a': bin_rc1 |= A_rc; j++; break;
+ case 'T': case 't': bin |= T; j++; break;
+ case 'C': case 'c': bin |= C; bin_rc1 |= G_rc; j++; break;
+ case 'G': case 'g': bin |= G; bin_rc1 |= C_rc; j++; break;
+ default: bin = 0; bin_rc1 = 0; j = 0; break;
+ }
+
+ if ( j >= OLIGO_SIZE )
+ {
+ array[ ( bin & mask ) ]++;
+
+ bin_rc2 = bin_rc1;
+
+ bin_rc2 >>= ( UINT_BITS - ( OLIGO_SIZE * 2 ) );
+
+ array[ ( bin_rc2 ) ]++;
+/*
+ printf( "\n" );
+ printf( "mask : %s\n", bits2string( mask ) );
+ printf( "bin : %s\n", bits2string( bin ) );
+ printf( "bin & mask: %s\n", bits2string( bin & mask ) );
+ printf( "bin_rc1 : %s\n", bits2string( bin_rc1 ) );
+ printf( "bin_rc2 : %s\n", bits2string( bin_rc2 ) );
+*/
+ }
+ }
+
+ fprintf( stderr, "done.\n" );
+ }
+
+ close_stream( fp );
+
+ fasta_free_entry( entry );
+
+ return array;
+}
+
+
+void oligo_count_output( char *path, uint *array )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Output oligo count for each sequence position. */
+
+ struct seq_entry *entry;
+ FILE *fp;
+ uint mask;
+ uint i;
+ uint j;
+ uint bin;
+ int count;
+ uint *block;
+ uint block_pos;
+ uint block_beg;
+ uint chr_pos;
+
+ mask = mask_create( OLIGO_SIZE );
+
+ MEM_GET( entry );
+
+ fp = read_open( path );
+
+ while ( ( fasta_get_entry( fp, entry ) ) )
+ {
+ fprintf( stderr, "Writing results for: %s ... ", entry->seq_name );
+
+ bin = 0;
+ j = 0;
+ block_pos = 0;
+ block = mem_get_zero( sizeof( uint ) * ( entry->seq_len + OLIGO_SIZE ) );
+
+ for ( i = 0; entry->seq[ i ]; i++ )
+ {
+ bin <<= 2;
+
+ switch( entry->seq[ i ] )
+ {
+ case 'A': case 'a': j++; break;
+ case 'T': case 't': bin |= T; j++; break;
+ case 'C': case 'c': bin |= C; j++; break;
+ case 'G': case 'g': bin |= G; j++; break;
+ default: bin = 0; j = 0; break;
+ }
+
+ if ( j >= OLIGO_SIZE )
+ {
+ count = array[ ( bin & mask ) ];
+
+ if ( count > 1 )
+ {
+ chr_pos = i - OLIGO_SIZE + 1;
+
+ if ( block_pos == 0 )
+ {
+ ZERO( block );
+
+ block_beg = chr_pos;
+
+ block[ block_pos ] = count;
+
+ block_pos++;
+ }
+ else
+ {
+ if ( chr_pos > block_beg + block_pos )
+ {
+ fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
+
+ block_pos = 0;
+ }
+ else
+ {
+ block[ block_pos ] = count;
+
+ block_pos++;
+ }
+ }
+ }
+ }
+ }
+
+ if ( block_pos > 0 )
+ {
+ fixedstep_put_entry( entry->seq_name, block_beg, 1, block, block_pos );
+
+ mem_free( block );
+ }
+
+ fprintf( stderr, "done.\n" );
+ }
+
+ close_stream( fp );
+
+ fasta_free_entry( entry );
+}
+
+
+void fixedstep_put_entry( char *chr, int beg, int step_size, uint *block_array, int block_size )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Outputs a block of fixedStep values. */
+
+ int i;
+
+ if ( block_size > 0 )
+ {
+ beg += 1; /* fixedStep format is 1 based. */
+
+ printf( "fixedStep chrom=%s start=%d step=%d\n", chr, beg, step_size );
+
+ for ( i = 0; i < block_size; i++ ) {
+ printf( "%u\n", block_array[ i ] );
+ }
+ }
+}
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+#include "fasta.h"
+
+int main( int argc, char *argv[] )
+{
+ char *file;
+ FILE *fp;
+ char chr[ 10 ];
+ int chr_beg;
+ int chr_end;
+
+ file = argv[ 1 ];
+
+ fp = read_open( file );
+
+ fscanf( "%s\t%d\t%d", fp, chr, chr_beg, chr_end );
+
+ print ( "CHR: %s CHR_BEG: %d CHR_END: %d\n", chr, chr_beg, chr_end );
+
+ close_stream( fp );
+
+ return 0;
+}
+
+
+/*
+
+int main( int argc, char *argv[] )
+{
+ char *file;
+ FILE *fp;
+ struct seq_entry *entry = NULL;
+ int count;
+
+ count = 0;
+
+ file = argv[ 1 ];
+
+ fp = read_open( file );
+
+ while ( ( fasta_get_entry( fp, entry ) ) != FALSE )
+ {
+ printf( "seq_name: %s\n", entry->seq_name );
+
+// mem_free( entry->seq_name );
+// mem_free( entry->seq );
+// entry = NULL;
+
+ count++;
+ }
+
+ printf( "count: %d\n", count );
+
+ close_stream( fp );
+
+ return 0;
+}
+
+
+*/
--- /dev/null
+#include <stdio.h>
+
+#define BUFFER 100 * 1024
+
+int main( int argc, char *argv[] )
+{
+ int count = 0;
+ char line[ BUFFER ];
+ FILE *fp;
+
+ if ( ( fp = fopen( argv[ 1 ], "r" ) ) == NULL )
+ {
+ return 1;
+ }
+
+ while ( ( fgets( line, BUFFER, fp ) ) != NULL )
+ {
+ if ( line[ 0 ] == '>' ) {
+ count++;
+ }
+ }
+
+ printf( "count: %d\n", count );
+
+ return 0;
+}
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+#include "ucsc.h"
+
+
+int main( int argc, char *argv[] )
+{
+ char *file;
+ FILE *fp;
+ struct bed_entry3 *bed;
+ int count;
+
+ file = argv[ 1 ];
+
+ fp = read_open( file );
+
+ count = 0;
+
+ bed_get_entry( fp, bed, 3 );
+
+ printf( "Count: %d\n", count );
+
+ close_stream( fp );
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include "common.h"
+
+int main()
+{
+ int size = 10;
+ int val = 40;
+ int array[ 10 ] = { 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 };
+
+ if ( binary_search_array( array, size, val ) ) {
+ printf( "val->%d found in array\n", val );
+ } else {
+ printf( "val->%d NOT found in array\n", val );
+ }
+
+ return 0;
+}
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+#include "hash.h"
+
+bool get_record( struct file_buffer *buffer, struct hash *record );
+void put_record( struct hash *record );
+
+int main( int argc, char *argv[] )
+{
+ int count;
+ char *file;
+ struct file_buffer *buffer = NULL;
+ struct hash *record = NULL;
+
+ file = argv[ 1 ];
+
+ buffer = read_open_buffer( file );
+
+ record = hash_new( 5 );
+
+ count = 0;
+
+ while ( ( get_record( buffer, record ) ) != FALSE )
+ {
+ put_record( record );
+
+ count++;
+ }
+
+ fprintf( stderr, "Count: %d\n", count );
+
+ hash_destroy( record );
+
+ buffer_destroy( buffer );
+
+ return 0;
+}
+
+
+bool get_record( struct file_buffer *buffer, struct hash *record )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Get next record from the stream. */
+
+ char *line = NULL;
+ char *val = NULL;
+ char key[ 256 ];
+ int len;
+ int i;
+ bool key_ok;
+
+ while ( ( line = buffer_gets( buffer ) ) )
+ {
+ key_ok = FALSE;
+
+ //printf( "LINE->%s<-", line );
+
+ if ( strcmp( line, "---\n" ) == 0 )
+ {
+// printf( "found\n" );
+
+ return TRUE;
+ }
+ else
+ {
+ len = strlen( line );
+
+ i = 0;
+
+ while ( i < len )
+ {
+ if ( i < len - 1 && line[ i ] == ':' && line[ i + 1 ] == ' ' )
+ {
+ key_ok = TRUE;
+
+ key[ i ] = '\0';
+
+ i += 2;
+
+ break;
+ }
+
+ key[ i ] = line[ i ];
+
+ i++;
+ }
+
+ if ( ! key_ok ) {
+ die( "Could not locate key." );
+ }
+
+ val = mem_get( len - i );
+
+ memcpy( val, &line[ i ], len - i - 1 );
+
+ val[ len - i ] = '\0';
+
+// printf( "key: ->%s<- val: ->%s<-\n", key, val );
+
+ hash_add( record, key, val );
+ }
+ }
+
+ return FALSE;
+}
+
+
+void put_record( struct hash *record )
+{
+ /* Martin A. Hansen, June 2008 */
+
+ /* Output a record to the stream. */
+
+ int i;
+ struct hash_elem *bucket;
+
+ for ( i = 0; i < record->table_size; i++ )
+ {
+ for ( bucket = record->table[ i ]; bucket != NULL; bucket = bucket->next ) {
+ printf( "%s: %s\n", ( char * ) bucket->key, ( char * ) bucket->val );
+ }
+ }
+
+ printf( "---\n" );
+}
+
+
+
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+#include "list.h"
+#include "fasta.h"
+
+int main()
+{
+ char *file = "/Users/m.hansen/test.fna";
+ FILE *fp;
+
+ struct list *entries = NULL;
+
+ fp = read_open( file );
+
+ fasta_get_entries( fp, &entries );
+
+ fasta_put_entries( entries);
+ close_stream( fp );
+
+ return 0;
+}
+
--- /dev/null
+#include "common.h"
+#include "filesys.h"
+
+
+int main( int argc, char *argv[] )
+{
+ struct file_buffer *buffer;
+ char *line;
+ char c;
+
+ buffer = read_open_buffer( argv[ 1 ] );
+
+ while ( ( line = buffer_gets( buffer ) ) ) {
+ printf( "LINE->%s<-", line );
+ }
+
+/*
+ while ( ( c = buffer_getc( buffer ) ) )
+ {
+ if ( c == '\n' ) {
+ printf( "CHAR->\\n\n" );
+ } else {
+ printf( "CHAR->%c\n", c );
+ }
+ }
+*/
+ buffer_destroy( buffer );
+
+ return 0;
+}
+
+
+
+/*
+#define SUBSTR_SIZE 15
+
+int main()
+{
+ char *string="foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar";
+ char substr[ SUBSTR_SIZE + 1 ];
+ int i;
+ int j;
+
+ for ( i = 0; i < strlen( string ) - SUBSTR_SIZE + 1; i++ )
+ {
+ for ( j = 0; j < SUBSTR_SIZE; j++ ) {
+ substr[ j ] = string[ i + j ];
+ }
+
+ substr[ j ] = '\0';
+
+ printf( "substr->%s\n", substr );
+ }gg
+
+ return 0;
+}
+
+*/
--- /dev/null
+#include <stdio.h>
+#include "common.h"
+#include "hash.h"
+
+
+int main()
+{
+ struct hash *my_hash;
+ int pot;
+ int i;
+ char *dummy;
+
+ pot = 16;
+
+ my_hash = hash_new( pot );
+
+ for ( i = 1; i <= ( 1 << pot ); i++ )
+ {
+ sprintf( dummy, "dummy_%d", i );
+
+ hash_add( my_hash, dummy, "FOO" );
+ }
+
+ hash_collision_stats( my_hash );
+
+// if ( ( val = ( char * ) hash_get( my_hash, key ) ) != NULL ) {
+// printf( "Key: %s, Val: %s\n", key, val );
+// } else {
+// printf( "Key: %s, Val: Not Found\n", key );
+// }
+
+ hash_destroy( my_hash );
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include "common.h"
+#include "list.h"
+
+int main()
+{
+// struct list *list_pt;
+//
+// char *string1 = "Hello";
+// char *string2 = "World";
+//
+// list_add( &list_pt, string1 );
+// list_add( &list_pt, string2 );
+//
+// if ( list_exists( list_pt, "World" ) ) {
+// printf( "Found\n" );
+// } else {
+// printf( "Not Found\n" );
+// }
+//
+// list_free( &list_pt );
+
+ struct list_int *list_int_pt;
+
+ int i = 4;
+ int j = 12;
+
+ list_add_int( &list_int_pt, i );
+ list_add_int( &list_int_pt, j );
+
+ if ( list_exists_int( list_int_pt, j ) ) {
+ printf( "Found\n" );
+ } else {
+ printf( "Not Found\n" );
+ }
+
+ list_free( &list_int_pt );
+
+ return 0;
+}
+
+
--- /dev/null
+#include <stdio.h>
+#include "common.h"
+#include "seq.h"
+
+
+int main()
+{
+ int bin;
+
+ /* 123456789012345 */
+ char *word = "GGGGGGGGGGGGGGG";
+
+ bin = oligo2bin( word );
+
+ printf( "bin->%d\n", bin );
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include "common.h"
+#include "seq.h"
+
+int main()
+{
+ char seq[] = "ACGACATCGGACTGACactgactgacatgcactg";
+
+ printf( "seq type: %s\n", seq_guess_type( seq ) );
+
+ printf( "before revcomp: %s\n", seq );
+
+ revcomp_nuc( seq );
+
+ printf( "after revcomp: %s\n", seq );
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include "common.h"
+#include "list.h"
+
+
+int main()
+{
+ char string[] = "FOO\tBAR\tFOOBAR\n";
+
+ struct list *fields;
+
+ chomp( string );
+
+ split( string, '\t', &fields );
+
+ list_print( fields );
+
+ return 0;
+}
--- /dev/null
+package Maasha::Align;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines to perform and print pairwise and multiple alignments
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use IPC::Open2;
+use Maasha::Common;
+use Maasha::Fasta;
+use Maasha::Calc;
+use Maasha::Seq;
+use vars qw ( @ISA @EXPORT );
+
+use constant {
+ HEAD => 0,
+ SEQ => 1,
+};
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub align
+{
+ # Martin A. Hansen, August 2007.
+
+ # Aligns a given list of FASTA entries and returns a
+ # list of aligned sequences as FASTA entries.
+ # (currently uses Muscle, but other align engines can
+ # be used with a bit of tweaking).
+
+ my ( $entries, # Fasta entries
+ $args, # additional alignment program specific arguments - OPTIONAL
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @aligned_entries, $muscle_args );
+
+ $muscle_args = "-quiet";
+ $muscle_args .= $args if $args;
+
+ @aligned_entries = &align_muscle( $entries, $muscle_args );
+
+ return wantarray ? @aligned_entries : \@aligned_entries;
+}
+
+
+sub align_muscle
+{
+ # Martin A. Hansen, June 2007.
+
+ # Aligns a given list of FASTA entries using Muscle.
+ # Returns a list of aligned sequences as FASTA entries.
+
+ my ( $entries, # FASTA entries
+ $args, # additional Muscle arguments - OPTIONAL
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $pid, $fh_in, $fh_out, $cmd, $entry, @aligned_entries );
+
+ $cmd = "muscle";
+ $cmd .= " " . $args if $args;
+
+ $pid = open2( $fh_out, $fh_in, $cmd );
+
+ map { &Maasha::Fasta::put_entry( $_, $fh_in ) } @{ $entries };
+
+ close $fh_in;
+
+ while ( $entry = &Maasha::Fasta::get_entry( $fh_out ) ) {
+ push @aligned_entries, $entry;
+ }
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ return wantarray ? @aligned_entries : \@aligned_entries;
+}
+
+
+sub align_print_pairwise
+{
+ # Martin A. Hansen, June 2007.
+
+ # Prints a given pairwise alignment in FASTA format.
+
+ my ( $entry1, # first entry
+ $entry2, # second entry
+ $fh, # output filehandle - OPTIONAL
+ $wrap, # wrap width - OPTIONAL
+ ) = @_;
+
+ # returns nothing
+
+ my ( @entries, $ruler1, $ruler2, $pins );
+
+ $ruler1 = &align_ruler( $entry1, 1 );
+ $ruler2 = &align_ruler( $entry2, 1 );
+ $pins = &align_pins( $entry1, $entry2 );
+
+ push @entries, $ruler1, $entry1, $pins, $entry2, $ruler2;
+
+ &align_print( \@entries, $fh, $wrap );
+}
+
+
+sub align_print_multi
+{
+ # Martin A. Hansen, June 2007.
+
+ # Prints a given multiple alignment in FASTA format.
+
+ my ( $entries, # list of aligned FASTA entries
+ $fh, # output filehandle - OPTIONAL
+ $wrap, # wrap width - OPTIONAL
+ $no_ruler, # omit ruler flag - OPTIONAL
+ $no_cons, # omit consensus flag - OPTIONAL
+ ) = @_;
+
+ # returns nothing
+
+ my ( @entries, $ruler, $consensus );
+
+ $ruler = &align_ruler( $entries->[ 0 ] );
+ $consensus = &align_consensus( $entries ) if not $no_cons;
+
+ unshift @{ $entries }, $ruler if not $no_ruler;
+ push @{ $entries }, $consensus;
+
+ &align_print( $entries, $fh, $wrap );
+}
+
+
+sub align_print
+{
+ # Martin A. Hansen, June 2007.
+
+ # Prints an alignment.
+
+ my ( $entries, # Alignment as FASTA entries
+ $fh, # output filehandle - OPTIONAL
+ $wrap, # wrap alignment - OPTIONAL
+ ) = @_;
+
+ # returns nothing
+
+ my ( $max, $blocks, $block, $entry );
+
+ $max = 0;
+
+ map { $max = length $_->[ HEAD ] if length $_->[ HEAD ] > $max } @{ $entries };
+
+ $blocks = &align_wrap( $entries, $wrap );
+
+ foreach $block ( @{ $blocks } )
+ {
+ foreach $entry ( @{ $block } )
+ {
+ $entry->[ HEAD ] =~ s/stats|ruler|consensus//;
+
+ if ( $fh ) {
+ print $fh $entry->[ HEAD ], " " x ( $max + 3 - length $entry->[ HEAD ] ), $entry->[ SEQ ], "\n";
+ } else {
+ print $entry->[ HEAD ], " " x ( $max + 3 - length $entry->[ HEAD ] ), $entry->[ SEQ ], "\n";
+ }
+ }
+ }
+}
+
+
+sub align_wrap
+{
+ # Martin A. Hansen, October 2005.
+
+ # Given a set of fasta entries wraps these
+ # according to a given width.
+
+ my ( $entries, # list of fasta_entries
+ $wrap, # wrap width - OPTIONAL
+ ) = @_;
+
+ # returns AoA
+
+ my ( $ruler, $i, $c, @lines, @blocks );
+
+ $wrap ||= 999999999;
+
+ $i = 0;
+
+ while ( $i < length $entries->[ 0 ]->[ SEQ ] )
+ {
+ undef @lines;
+
+ for ( $c = 0; $c < @{ $entries }; $c++ )
+ {
+ if ( $entries->[ $c ]->[ HEAD ] eq "ruler" )
+ {
+ $ruler = substr $entries->[ $c ]->[ SEQ ], $i, $wrap;
+
+ if ( $ruler =~ /^(\d+)/ ) {
+ $ruler =~ s/^($1)/' 'x(length $1)/e;
+ }
+
+ if ( $ruler =~ /(\d+)$/ ) {
+ $ruler =~ s/($1)$/' 'x(length $1)/e;
+ }
+
+ push @lines, [ "ruler", $ruler ];
+ }
+ else
+ {
+ push @lines, [ $entries->[ $c ]->[ HEAD ], substr $entries->[ $c ]->[ SEQ ], $i, $wrap ];
+ }
+ }
+
+ push @blocks, [ @lines ];
+
+ $i += $wrap;
+ }
+
+ return wantarray ? @blocks: \@blocks;
+}
+
+
+sub align_pins
+{
+ # Martin A. Hansen, June 2007.
+
+ # Given two aligned FASTA entries, generates an entry with pins.
+
+ my ( $entry1, # first entry
+ $entry2, # second entry
+ $type, # residue type - OPTIONAL
+ ) = @_;
+
+ # returns tuple
+
+ my ( $blosum, $i, $char1, $char2, $pins );
+
+ $type ||= &Maasha::Seq::seq_guess_type( $entry1->[ SEQ ] );
+
+ $blosum = &blosum_read() if $type =~ /protein/;
+
+ for ( $i = 0; $i < length $entry1->[ SEQ ]; $i++ )
+ {
+ $char1 = substr $entry1->[ SEQ ], $i, 1;
+ $char2 = substr $entry2->[ SEQ ], $i, 1;
+
+ if ( $blosum and $char1 eq $char2 ) {
+ $pins .= $char1;
+ } elsif ( $char1 eq $char2 ) {
+ $pins .= "|";
+ } elsif ( $blosum and $blosum->{ $char1 }->{ $char2 } > 0 ) {
+ $pins .= "+";
+ } else {
+ $pins .= " ";
+ }
+ }
+
+ return wantarray ? ( "consensus", $pins ) : [ "consensus", $pins ];
+}
+
+
+sub align_ruler
+{
+ # Martin A. Hansen, February 2007;
+
+ # Gererates a ruler for a given FASTA entry (with indels).
+
+ my ( $entry, # FASTA entry
+ $count_gaps, # flag for counting indels in pairwise alignments.
+ ) = @_;
+
+ # Returns tuple
+
+ my ( $i, $char, $skip, $count, $gap, $tics );
+
+ $char = "";
+ $gap = 0;
+ $i = 1;
+
+ while ( $i <= length $entry->[ SEQ ] )
+ {
+ $char = substr( $entry->[ SEQ ], $i - 1, 1 ) if $count_gaps;
+
+ $gap++ if $char eq "-";
+
+ if ( $skip )
+ {
+ $skip--;
+ }
+ else
+ {
+ $count = $i - $gap;
+ $count = 1 if $char eq "-";
+
+ if ( $count % 100 == 0 )
+ {
+ if ( $count + length( $count ) >= length $entry->[ SEQ ] )
+ {
+ $tics .= "|";
+ }
+ else
+ {
+ $tics .= "|" . $count;
+ $skip = length $count;
+ }
+ }
+ elsif ( $count % 50 == 0 ) {
+ $tics .= ":";
+ } elsif ( $count % 10 == 0 ) {
+ $tics .= ".";
+ } else {
+ $tics .= " ";
+ }
+ }
+
+ $i++;
+ }
+
+ return wantarray ? ( "ruler", $tics ) : [ "ruler", $tics ];
+}
+
+
+sub align_consensus
+{
+ # Martin A. Hansen, June 2006.
+
+ # Given an alignment as a list of FASTA entries,
+ # generates a consensus sequences based on the
+ # entropies for each column similar to the way
+ # a sequence logo i calculated. Returns the
+ # consensus sequence as a FASTA entry.
+
+ my ( $entries, # list of aligned FASTA entries
+ $type, # residue type - OPTIONAL
+ $min_sim, # minimum similarity - OPTIONAL
+ ) = @_;
+
+ # Returns tuple
+
+ my ( $bit_max, $data, $pos, $char, $score, $entry );
+
+ $type ||= &Maasha::Seq::seq_guess_type( $entries->[ 0 ]->[ SEQ ] );
+ $min_sim ||= 50;
+
+ if ( $type =~ /protein/ ) {
+ $bit_max = 4;
+ } else {
+ $bit_max = 2;
+ }
+
+ $data = &Maasha::Seq::seqlogo_calc( $bit_max, $entries );
+
+ foreach $pos ( @{ $data } )
+ {
+ if ( $pos->[ -1 ] )
+ {
+ ( $char, $score ) = @{ $pos->[ -1 ] };
+
+ if ( ( $score / $bit_max ) * 100 >= $min_sim ) {
+ $entry->[ SEQ ] .= $char;
+ } else {
+ $entry->[ SEQ ] .= "-";
+ }
+ }
+ else
+ {
+ $entry->[ SEQ ] .= "-";
+ }
+ }
+
+ $entry->[ HEAD ] = "Consensus: $min_sim%";
+
+ return wantarray ? @{ $entry } : $entry;
+}
+
+
+sub align_sim_global
+{
+ # Martin A. Hansen, June 2007.
+
+ # Calculate the global similarity of two aligned entries
+ # The similarity is calculated as the number of matching
+ # residues divided by the length of the shortest sequence.
+
+ my ( $entry1, # first aligned entry
+ $entry2, # second aligned entry
+ ) = @_;
+
+ # returns float
+
+ my ( $seq1, $seq2, $len1, $len2, $i, $match_tot, $min, $sim );
+
+ $seq1 = $entry1->[ SEQ ];
+ $seq2 = $entry2->[ SEQ ];
+
+ # $seq1 =~ tr/-//d;
+ # $seq2 =~ tr/-//d;
+
+ $seq1 =~ s/^-*//;
+ $seq2 =~ s/^-*//;
+ $seq1 =~ s/-*$//;
+ $seq2 =~ s/-*$//;
+
+ $len1 = length $seq1;
+ $len2 = length $seq2;
+
+ return 0 if $len1 == 0 or $len2 == 0;
+
+ $match_tot = 0;
+
+ for ( $i = 0; $i < $len1; $i++ ) {
+ $match_tot++ if substr( $entry1->[ SEQ ], $i, 1 ) eq substr( $entry2->[ SEQ ], $i, 1 );
+ }
+
+ $min = &Maasha::Calc::min( $len1, $len2 );
+
+ $sim = sprintf( "%.2f", ( $match_tot / $min ) * 100 );
+
+ return $sim;
+}
+
+
+sub align_tile
+{
+ # Martin A. Hansen, February 2008.
+
+ # Tile a list of query sequences agains a reference sequence,
+ # using pairwise alignments. The result is returned as a list of
+ # aligned FASTA entries.
+
+ my ( $ref_entry, # reference entry as [ HEAD, SEQ ] tuple
+ $q_entries, # list of [ HEAD, SEQ ] tuples
+ $args, # argument hash
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $entry, $seq1, $seq2, $type, $align1, $align2, $sim1, $sim2, $gaps, @entries );
+
+ $args->{ "identity" } ||= 70;
+
+ foreach $entry ( @{ $q_entries } )
+ {
+ $seq1 = $entry->[ SEQ ];
+
+ $type = &Maasha::Seq::seq_guess_type( $seq1 );
+
+ if ( $type eq "rna" ) {
+ $seq2 = &Maasha::Seq::rna_revcomp( $seq1 );
+ } elsif ( $type eq "dna" ) {
+ $seq2 = &Maasha::Seq::dna_revcomp( $seq1 );
+ } else {
+ &Maasha::Common::error( qq(Bad sequence type->$type) );
+ }
+
+ $align1 = &Maasha::Align::align_muscle( [ $ref_entry, [ $entry->[ HEAD ] . "_+", $seq1 ] ], "-quiet -maxiters 1" );
+ $align2 = &Maasha::Align::align_muscle( [ $ref_entry, [ $entry->[ HEAD ] . "_-", $seq2 ] ], "-quiet -maxiters 1" );
+
+ if ( $args->{ "supress_indels" } )
+ {
+ &align_supress_indels( $align1 );
+ &align_supress_indels( $align2 );
+ }
+
+ $sim1 = &Maasha::Align::align_sim_global( $align1->[ 0 ], $align1->[ 1 ] );
+ $sim2 = &Maasha::Align::align_sim_global( $align2->[ 0 ], $align2->[ 1 ] );
+
+ if ( $sim1 < $args->{ "identity" } and $sim2 < $args->{ "identity" } )
+ {
+ # do nothing
+ }
+ elsif ( $sim1 > $sim2 )
+ {
+ $gaps = $align1->[ 0 ]->[ SEQ ] =~ tr/-//;
+
+ $align1->[ 1 ]->[ SEQ ] =~ s/-{$gaps}$// if $gaps;
+
+ $entry->[ HEAD ] = "$align1->[ 1 ]->[ HEAD ]_$sim1";
+ $entry->[ SEQ ] = $align1->[ 1 ]->[ SEQ ];
+
+ push @entries, $entry;
+ }
+ else
+ {
+ $gaps = $align2->[ 0 ]->[ SEQ ] =~ tr/-//;
+
+ $align2->[ 1 ]->[ SEQ ] =~ s/-{$gaps}$// if $gaps;
+
+ $entry->[ HEAD ] = "$align2->[ 1 ]->[ HEAD ]_$sim2";
+ $entry->[ SEQ ] = $align2->[ 1 ]->[ SEQ ];
+
+ push @entries, $entry;
+ }
+ }
+
+ @entries = sort { $b->[ SEQ ] cmp $a->[ SEQ ] } @entries;
+
+ unshift @entries, $ref_entry;
+
+ return wantarray ? @entries : \@entries;
+}
+
+
+sub align_supress_indels
+{
+ # Martin A. Hansen, June 2008.
+
+ # Given a pairwise alignment, removes
+ # indels in the first sequence AND corresponding
+ # sequence in the second.
+
+ my ( $align, # pairwise alignment
+ ) = @_;
+
+ # Returns nothing
+
+ my ( $count, $seq, $i );
+
+ $count = $align->[ 0 ]->[ SEQ ] =~ tr/-//;
+
+ if ( $count > 0 )
+ {
+ for ( $i = 0; $i < length $align->[ 0 ]->[ SEQ ]; $i++ )
+ {
+ if ( substr( $align->[ 0 ]->[ SEQ ], $i, 1 ) ne '-' ) {
+ $seq .= substr( $align->[ 1 ]->[ SEQ ], $i, 1 );
+ }
+
+ }
+
+ $align->[ 0 ]->[ SEQ ] =~ tr/-//d;
+ $align->[ 1 ]->[ SEQ ] = $seq;
+ }
+}
+
+
+sub align_invert
+{
+ # Martin A. Hansen, February 2008.
+
+ # Invert an alignment in such a way that only
+ # residues differing from the first sequence (the reference sequence)
+ # are shown. The matching sequence can either be lowercased (soft) or replaced
+ # with _.
+
+ my ( $entries, # list of FASTA entries.
+ $soft,
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $i, $c, $char1, $char2 );
+
+ map { $_->[ SEQ ] =~ tr/-/_/ } @{ $entries };
+
+ for ( $i = 0; $i < length $entries->[ 0 ]->[ SEQ ]; $i++ )
+ {
+ $char1 = uc substr $entries->[ 0 ]->[ SEQ ], $i, 1;
+
+ for ( $c = 1; $c < @{ $entries }; $c++ )
+ {
+ $char2 = uc substr $entries->[ $c ]->[ SEQ ], $i, 1;
+
+ if ( $char1 eq $char2 )
+ {
+ if ( $soft ) {
+ substr $entries->[ $c ]->[ SEQ ], $i, 1, lc $char2;
+ } else {
+ substr $entries->[ $c ]->[ SEQ ], $i, 1, "-";
+ }
+ }
+ }
+ }
+}
+
+
+sub blosum_read
+{
+ # Martin A. Hansen, January 2006.
+
+ # this routine parses the BLOSUM62 matrix,
+ # which is located in the __DATA__ section
+
+ # returns HoH
+
+ my ( @lines, @chars, $i, $c, @list, $HoH );
+
+ @lines = <DATA>;
+ @lines = grep { $_ !~ /^$|^#/ } @lines;
+
+ @chars = split /\s+/, $lines[ 0 ];
+
+ $i = 1;
+
+ while( $lines[ $i ] )
+ {
+ last if $lines[ $i ] =~ /^__END__/;
+
+ @list = split /\s+/, $lines[ $i ];
+
+ for ( $c = 1; $c < @list; $c++ ) {
+ $HoH->{ $list[ 0 ] }->{ $chars[ $c ] } = $list[ $c ];
+ }
+
+ $i++;
+ }
+
+ return wantarray ? %{ $HoH } : $HoH;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__DATA__
+
+
+# Matrix made by matblas from blosum62.iij
+# * column uses minimum score
+# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
+# Blocks Database = /data/blocks_5.0/blocks.dat
+# Cluster Percentage: >= 62
+# Entropy = 0.6979, Expected = -0.5209
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
+A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
+R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
+N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
+D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
+C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
+Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
+E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
+G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
+H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
+I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
+L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
+K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
+M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
+F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
+S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
+T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
+Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
+V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
+B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
+Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
+X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
+* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
+
+
+__END__
--- /dev/null
+package Align;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# yak yak yak
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Storable qw( dclone );
+use IPC::Open2;
+use Maasha::Calc;
+use Maasha::Seq;
+use vars qw ( @ISA @EXPORT );
+
+use constant {
+ Q_BEG => 0,
+ Q_END => 1,
+ S_BEG => 2,
+ S_END => 3,
+ LEN => 4,
+ SCORE => 5,
+ HEAD => 0,
+ SEQ => 1,
+};
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub align_two_seq
+{
+ # Martin A. Hansen, August 2006.
+
+ # Generates an alignment by chaining matches, which are subsequences
+ # shared between two sequences. The routine functions by considering
+ # only matches within a given search space. If no matches are given
+ # these will be generated, if long matches are found these will be
+ # included in the alignment, otherwise matches will be included depending
+ # on a calculated score. New search spaces spanning the spaces between
+ # matches and the search space boundaries will be cast and recursed into.
+
+ my ( $q_seq, # sequence 1 ref
+ $s_seq, # sequence 2 ref
+ $matches, # list of matches
+ $q_min, # q sequence start position
+ $q_max, # q sequecne stop position
+ $s_min, # s sequence start position
+ $s_max, # s sequecne stop position
+ $args, # argument hash
+ ) = @_;
+
+ # returns a chain of matches that can be chained into an alignment
+
+ $matches ||= [];
+ $q_min ||= 0;
+ $s_min ||= 0;
+ $q_max ||= length( ${ $q_seq } ) - 1;
+ $s_max ||= length( ${ $s_seq } ) - 1;
+
+ $args->{ "long_matches" } ||= 50;
+ $args->{ "alph_len" } ||= 4;
+
+ my ( $wordsize, @chain, $match, $best_match, @long_matches );
+
+ $matches = &select_matches( $matches, $q_min, $q_max, $s_min, $s_max );
+
+ if ( scalar @{ $matches } == 0 ) # no matches - find some!
+ {
+ # $wordsize = &find_wordsize( $q_min, $q_max, $s_min, $s_max, $args );
+ $wordsize = 4;
+ $matches = &find_matches( $q_seq, $s_seq, $wordsize, $q_min, $q_max, $s_min, $s_max );
+
+ while ( scalar @{ $matches } == 0 and $wordsize > 1 )
+ {
+ $wordsize--;
+ $matches = &find_matches( $q_seq, $s_seq, $wordsize, $q_min, $q_max, $s_min, $s_max );
+ }
+
+ if ( scalar @{ $matches } > 0 ) {
+ push @chain, &align_two_seq( $q_seq, $s_seq, $matches, $q_min, $q_max, $s_min, $s_max, $args );
+ }
+ }
+ elsif ( @long_matches = grep { $_->[ LEN ] >= $args->{ "long_matches" } } @{ $matches } ) # long matches found - include all that don't overlap!
+ {
+ @long_matches = &order_matches( \@long_matches );
+
+ foreach $match ( @long_matches )
+ {
+ push @chain, $match;
+
+ if ( $match->[ Q_BEG ] - $q_min >= 2 and $match->[ S_BEG ] - $s_min >= 2 ) {
+ push @chain, &align_two_seq( $q_seq, $s_seq, $matches, $q_min, $match->[ Q_BEG ] - 1, $s_min, $match->[ S_BEG ] - 1, $args ); # intermediate search space
+ }
+
+ $q_min = $match->[ Q_END ] + 1;
+ $s_min = $match->[ S_END ] + 1;
+ }
+
+ if ( $q_min + 1 < $q_max and $s_min + 1 < $s_max ) {
+ push @chain, &align_two_seq( $q_seq, $s_seq, $matches, $q_min, $q_max, $s_min, $s_max, $args ); # remaining search space
+ }
+ }
+ else # shorter matches are included according to score
+ {
+ foreach $match ( @{ $matches } ) {
+ # $match->[ SCORE ] = &score_match( $match, $q_min, $q_max, $s_min, $s_max );
+ $match->[ SCORE ] = &score_match_niels( $match, $q_seq, $q_min, $q_max, $s_min, $s_max );
+ }
+
+ # @{ $matches } = grep { $_->[ SCORE ] > 0 } @{ $matches };
+ @{ $matches } = grep { $_->[ SCORE ] <= 0.25 } @{ $matches };
+ # @{ $matches } = sort { $b->[ SCORE ] <=> $a->[ SCORE ] } @{ $matches };
+ @{ $matches } = sort { $a->[ SCORE ] <=> $b->[ SCORE ] } @{ $matches };
+
+ $best_match = shift @{ $matches };
+
+ if ( $best_match )
+ {
+ push @chain, $best_match;
+
+ if ( $best_match->[ Q_BEG ] - $q_min >= 2 and $best_match->[ S_BEG ] - $s_min >= 2 ) {
+ push @chain, &align_two_seq( $q_seq, $s_seq, $matches, $q_min, $best_match->[ Q_BEG ] - 1, $s_min, $best_match->[ S_BEG ] - 1, $args ); # left search space
+ }
+
+ if ( $q_max - $best_match->[ Q_END ] >= 2 and $s_max - $best_match->[ S_END ] >= 2 ) {
+ push @chain, &align_two_seq( $q_seq, $s_seq, $matches, $best_match->[ Q_END ] + 1, $q_max, $best_match->[ S_END ] + 1, $s_max, $args ); # right search space
+ }
+ }
+ }
+
+ return wantarray ? @chain : \@chain;
+}
+
+
+sub select_matches
+{
+ # Martin A. Hansen, August 2006.
+
+ # Given a list of matches and a search space,
+ # include only those matches contained within
+ # this search space.
+
+ my ( $matches, # list of matches
+ $q_min, # q sequence start position
+ $q_max, # q sequecne stop position
+ $s_min, # s sequence start position
+ $s_max, # s sequecne stop position
+ ) = @_;
+
+ # returns list of matches
+
+ my ( @matches );
+
+ @matches = grep { $_->[ Q_BEG ] >= $q_min and
+ $_->[ S_BEG ] >= $s_min and
+ $_->[ Q_END ] <= $q_max and
+ $_->[ S_END ] <= $s_max } @{ $matches };
+
+ return wantarray ? @matches : \@matches;
+}
+
+
+sub order_matches
+{
+ # Martin A. Hansen, October 2006
+
+ # given a list of long matches, order these by length and position
+ # and include only those long matches that does not cross.
+
+ my ( $long_matches, # list of matches
+ ) = @_;
+
+ # returns a list of matches
+
+ my ( @matches, $match, $i );
+
+ @{ $long_matches } = sort { $b->[ LEN ] <=> $a->[ LEN ] } @{ $long_matches };
+
+ @matches = shift @{ $long_matches };
+
+ foreach $match ( @{ $long_matches } )
+ {
+ if ( $match->[ Q_END ] < $matches[ 0 ]->[ Q_BEG ] and $match->[ S_END ] < $matches[ 0 ]->[ S_BEG ] )
+ {
+ unshift @matches, $match;
+ }
+ elsif ( $match->[ Q_BEG ] > $matches[ -1 ]->[ Q_END ] and $match->[ S_BEG ] > $matches[ -1 ]->[ S_END ] )
+ {
+ push @matches, $match;
+ }
+ else
+ {
+ for ( $i = 1; $i < @matches; $i++ )
+ {
+ if ( $matches[ $i - 1 ]->[ Q_END ] < $match->[ Q_BEG ] and $match->[ Q_END ] < $matches[ $i ]->[ Q_BEG ] and
+ $matches[ $i - 1 ]->[ S_END ] < $match->[ S_BEG ] and $match->[ S_END ] < $matches[ $i ]->[ S_BEG ]
+ )
+ {
+ splice @matches, $i, 0, dclone $match;
+ last;
+ }
+ }
+ }
+ }
+
+ return wantarray ? @matches : \@matches;
+}
+
+
+sub find_wordsize
+{
+ # Martin A. Hansen, August 2006.
+
+ # Given a search space calculates the wordsize for a word so a match
+ # occurs only a few times. More matches may be needed at low similarity in
+ # order to avoid starting with a wrong match.
+
+ my ( $q_min, # q sequence start position
+ $q_max, # q sequecne stop position
+ $s_min, # s sequence start position
+ $s_max, # s sequecne stop position
+ $args, # argument hash
+ ) = @_;
+
+ # returns integer
+
+ my ( $q_dim, $s_dim, $dim_min, $wordsize );
+
+ $q_dim = $q_max - $q_min + 1;
+ $s_dim = $s_max - $s_min + 1;
+
+ $dim_min = &Maasha::Calc::min( $q_dim, $s_dim );
+
+ $wordsize = 1;
+
+ if ( $dim_min > 2000000 ) # optimized for DNA
+ {
+ $wordsize = $args->{ "long_matches" };
+ }
+ elsif ( $dim_min > 100000 ) # optimized for DNA
+ {
+ $wordsize = int( $args->{ "long_matches" } / 2 );
+ }
+ elsif ( $q_dim > 100 or $s_dim > 100 ) # optimized for DNA
+ {
+ while ( $args->{ "alph_len" } ** $wordsize <= $q_dim * $s_dim and $wordsize < $dim_min ) {
+ $wordsize++;
+ }
+ }
+ else
+ {
+ while ( $args->{ "alph_len" } ** $wordsize <= $dim_min and $wordsize < $dim_min ) {
+ $wordsize++;
+ }
+ }
+
+ return $wordsize;
+}
+
+
+sub find_matches
+{
+ # Martin A. Hansen, November 2006
+
+ # given two sequences, find all maximum expanded matches between these
+
+ my ( $q_seq, # sequence 1
+ $s_seq, # sequence 2
+ $wordsize, # word size
+ $q_min, # q sequence start position
+ $q_max, # q sequecne stop position
+ $s_min, # s sequence start position
+ $s_max, # s sequecne stop position
+ ) = @_;
+
+ # returns list of matches
+
+ my ( $q_beg, $q_word, %word_hash, $s_beg, $s_word, $match, @matches );
+
+ if ( length ${ $s_seq } > length ${ $q_seq } )
+ {
+ for ( $q_beg = $q_min; $q_beg <= $q_max - $wordsize + 1; $q_beg++ )
+ {
+ $q_word = lc substr ${ $q_seq }, $q_beg, $wordsize;
+
+ next if $q_word =~ /n/i; # DNA/genome optimization
+
+ push @{ $word_hash{ $q_word } }, $q_beg;
+ }
+
+ for ( $s_beg = $s_min; $s_beg <= $s_max - $wordsize + 1; $s_beg++ )
+ {
+ $s_word = lc substr ${ $s_seq }, $s_beg, $wordsize;
+
+ if ( exists $word_hash{ $s_word } )
+ {
+ foreach $q_beg ( @{ $word_hash{ $s_word } } )
+ {
+ $match = [ $q_beg, $q_beg + $wordsize - 1, $s_beg, $s_beg + $wordsize - 1 ];
+
+ if ( grep { $match->[ Q_BEG ] >= $_->[ Q_BEG ] and
+ $match->[ Q_END ] <= $_->[ Q_END ] and
+ $match->[ S_BEG ] >= $_->[ S_BEG ] and
+ $match->[ S_END ] <= $_->[ S_END ] } @matches )
+ {
+ next; # match is redundant
+ }
+ else
+ {
+ $match = &expand_match( $q_seq, $s_seq, $match, $q_max, $q_min, $s_max, $s_min );
+ $match->[ LEN ] = $match->[ Q_END ] - $match->[ Q_BEG ] + 1;
+
+ push @matches, $match;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ for ( $s_beg = $s_min; $s_beg <= $s_max - $wordsize + 1; $s_beg++ )
+ {
+ $s_word = lc substr ${ $s_seq }, $s_beg, $wordsize;
+
+ next if $s_word =~ /n/i; # DNA/genome optimization
+
+ push @{ $word_hash{ $s_word } }, $s_beg;
+ }
+
+ for ( $q_beg = $q_min; $q_beg <= $q_max - $wordsize + 1; $q_beg++ )
+ {
+ $q_word = lc substr ${ $q_seq }, $q_beg, $wordsize;
+
+ if ( exists $word_hash{ $q_word } )
+ {
+ foreach $s_beg ( @{ $word_hash{ $q_word } } )
+ {
+ $match = [ $q_beg, $q_beg + $wordsize - 1, $s_beg, $s_beg + $wordsize - 1 ];
+
+ if ( grep { $match->[ Q_BEG ] >= $_->[ Q_BEG ] and
+ $match->[ Q_END ] <= $_->[ Q_END ] and
+ $match->[ S_BEG ] >= $_->[ S_BEG ] and
+ $match->[ S_END ] <= $_->[ S_END ] } @matches )
+ {
+ next; # match is redundant
+ }
+ else
+ {
+ $match = &expand_match( $q_seq, $s_seq, $match, $q_max, $q_min, $s_max, $s_min );
+ $match->[ LEN ] = $match->[ Q_END ] - $match->[ Q_BEG ] + 1;
+
+ push @matches, $match;
+ }
+ }
+ }
+ }
+ }
+
+ return wantarray ? @matches : \@matches;
+}
+
+
+sub expand_match
+{
+ # Martin A. Hansen, August 2006.
+
+ # Given two sequences and a match, expand the match maximally.
+ # A match is defined like this: [ Q_BEG, Q_END, S_BEG, S_END ]
+
+ my ( $q_seq, # sequence 1 ref
+ $s_seq, # sequence 2 ref
+ $match, # sequence match
+ $q_max, # q sequecne stop position
+ $q_min, # q sequence start position
+ $s_max, # s sequecne stop position
+ $s_min, # s sequence start position
+ ) = @_;
+
+ # returns match
+
+ my ( $q_pos, $s_pos );
+
+ # expanding forward
+
+ $q_pos = $match->[ Q_END ] + 1;
+ $s_pos = $match->[ S_END ] + 1;
+
+ while ( $q_pos <= $q_max and $s_pos <= $s_max and lc substr( ${ $q_seq }, $q_pos, 1 ) eq lc substr( ${ $s_seq }, $s_pos, 1 ) )
+ {
+ $q_pos++;
+ $s_pos++;
+ }
+
+ $match->[ Q_END ] = $q_pos - 1;
+ $match->[ S_END ] = $s_pos - 1;
+
+ # expanding backwards
+
+ $q_pos = $match->[ Q_BEG ] - 1;
+ $s_pos = $match->[ S_BEG ] - 1;
+
+ while ( $q_pos >= $q_min and $s_pos >= $s_min and lc substr( ${ $q_seq }, $q_pos, 1 ) eq lc substr( ${ $s_seq }, $s_pos, 1 ) )
+ {
+ $q_pos--;
+ $s_pos--;
+ }
+
+ $match->[ Q_BEG ] = $q_pos + 1;
+ $match->[ S_BEG ] = $s_pos + 1;
+
+ return $match;
+}
+
+
+sub score_match
+{
+ # Martin A. Hansen, August 2006
+
+ # given a match and a search space scores the match according to three criteria:
+
+ # 1) length of match - the longer the better.
+ # 2) distance to closest corner - the shorter the better.
+ # 3) distance to closest narrow end of the search space - the shorter the better.
+
+ # each of these scores are divided by search space dimentions, and the total score
+ # is calculated: total = score_len - score_corner - score_narrow
+
+ # the higher the score, the better the match.
+
+ my ( $match, # match
+ $q_min, # q sequence start position
+ $q_max, # q sequecne stop position
+ $s_min, # s sequence start position
+ $s_max, # s sequecne stop position
+ ) = @_;
+
+ # returns a positive number
+
+ my ( $q_dim, $s_dim, $score_len, $score_corner, $score_narrow, $score_total, $beg_diag_dist, $end_diag_dist,
+ $min_diag_dist, $score_diag, $beg_narrow_dist, $end_narrow_dist, $max_narrow_dist );
+
+ # ----- 1) scoring according to match length
+
+ $score_len = $match->[ LEN ] ** 3;
+
+ # ---- 2) score according to distance away from diagonal
+
+ $q_dim = $q_max - $q_min + 1;
+ $s_dim = $s_max - $s_min + 1;
+
+ if ( $q_dim >= $s_dim ) # s_dim is the narrow end
+ {
+ $beg_diag_dist = &Maasha::Calc::dist_point2line( $match->[ Q_BEG ], $match->[ S_BEG ], $q_min, $s_min, $q_min + $s_dim, $s_min + $s_dim );
+ $end_diag_dist = &Maasha::Calc::dist_point2line( $match->[ Q_BEG ], $match->[ S_BEG ], $q_max - $s_dim, $s_max - $s_dim, $q_max, $s_max );
+ }
+ else
+ {
+ $beg_diag_dist = &Maasha::Calc::dist_point2line( $match->[ Q_BEG ], $match->[ S_BEG ], $q_min, $s_min, $q_min + $q_dim, $s_min + $q_dim );
+ $end_diag_dist = &Maasha::Calc::dist_point2line( $match->[ Q_BEG ], $match->[ S_BEG ], $q_max - $q_dim, $s_max - $q_dim, $q_max, $s_max );
+ }
+
+ $min_diag_dist = &Maasha::Calc::min( $beg_diag_dist, $end_diag_dist );
+
+ $score_diag = 2 * $min_diag_dist ** 2;
+
+ # ----- 3) scoring according to distance to the narrow end of the search space
+
+ if ( $q_dim > $s_dim ) # s_dim is the narrow end
+ {
+ $beg_narrow_dist = $match->[ Q_BEG ] - $q_min;
+ $end_narrow_dist = $q_max - $match->[ Q_BEG ];
+
+ $max_narrow_dist = &Maasha::Calc::max( $beg_narrow_dist, $end_narrow_dist );
+ }
+ elsif ( $q_dim < $s_dim )
+ {
+ $beg_narrow_dist = $match->[ S_BEG ] - $s_min;
+ $end_narrow_dist = $s_max - $match->[ S_BEG ];
+
+ $max_narrow_dist = &Maasha::Calc::max( $beg_narrow_dist, $end_narrow_dist );
+ }
+ else
+ {
+ $max_narrow_dist = 0;
+ }
+
+ $score_narrow = $max_narrow_dist;
+
+ $score_total = $score_len - $score_narrow - $score_diag;
+ # $score_total = -1 if 3 * $min_diag_dist > $match->[ LEN ];
+
+ return $score_total;
+}
+
+
+sub score_match_niels
+{
+ # Niels Larsen, June 2004.
+
+ # Creates a crude "heuristic" attempt of telling how likely it is that a
+ # given match occurs by chance in a given search space. If sequences are
+ # given their composition is taken into account. The scoring punishes
+ # distance from diagonal(s) and distance from previous match(es). Scores
+ # range from zero and up, and lower is better.
+
+ my ( $match, # Match array
+ $q_seq, # Either q_seq or s_seq
+ $q_min, # Lower bound search area (query sequence)
+ $q_max, # Upper bound search area (query sequence)
+ $s_min, # Lower bound search area (subject sequence)
+ $s_max, # Upper bound search area (subject sequence)
+ ) = @_;
+
+ # Returns a positive number.
+
+ my ( $q_beg, $s_beg, $q_end, $s_end, $q_dim, $s_dim, $seq, $pos,
+ $q_delta_beg, $s_delta_beg, $q_delta_end, $s_delta_end, $i,
+ $delta_beg_max, $delta_end_max, $as, $gs, $ts, $cs, $pmatch,
+ $score, $moves, $dist_beg, $dist_end, $seqlen, %chars, $delta,
+ $delta_max );
+
+ $q_beg = $match->[Q_BEG];
+ $q_end = $match->[Q_END];
+ $s_beg = $match->[S_BEG];
+ $s_end = $match->[S_END];
+
+ # >>>>>>>>>>>>>>>>>>>>>>> CRUDE INITIAL SCORE <<<<<<<<<<<<<<<<<<<<<<
+
+ # Get the probability of a match from the sequence composition (when
+ # match is longer than 20 and sequence is given, otherwise use 0.25)
+ # and raise that to the power of the length.
+
+ if ( $match->[LEN] > 20 and defined $q_seq )
+ {
+ $seq = substr ${ $q_seq }, $q_beg, $q_end-$q_beg+1;
+ $seqlen = length $seq;
+
+ $chars{"a"} = $chars{"g"} = $chars{"c"} = $chars{"t"} = 0;
+
+ for ( $i = 0; $i < $seqlen; $i++ )
+ {
+ $chars{ substr $seq, $i, 1 }++;
+ }
+
+ $pmatch = ($chars{"a"}/$seqlen)**2 + ($chars{"c"}/$seqlen)**2
+ + ($chars{"g"}/$seqlen)**2 + ($chars{"t"}/$seqlen)**2;
+ }
+ else {
+ $pmatch = 0.25;
+ }
+
+ $score = $pmatch ** ( $q_end - $q_beg + 1 );
+
+# # Punish by difference in height and width of search space,
+
+# $q_dim = $q_max - $q_min + 1;
+# $s_dim = $s_max - $s_min + 1;
+
+# if ( $q_dim != $s_dim ) {
+# $score *= abs ( $q_dim - $s_dim ) ** 2;
+# }
+
+ return $score if $score > 0.25;
+
+ # Punish by how far the match is to the closest corner of the search
+ # space,
+
+ $q_delta_beg = $q_beg - $q_min;
+ $s_delta_beg = $s_beg - $s_min;
+
+ $q_delta_end = $q_max - $q_end;
+ $s_delta_end = $s_max - $s_end;
+
+ if ( $q_delta_beg > $s_delta_beg ) {
+ $delta_beg_max = $q_delta_beg;
+ } else {
+ $delta_beg_max = $s_delta_beg;
+ }
+
+ if ( $q_delta_end > $s_delta_end ) {
+ $delta_end_max = $q_delta_end;
+ } else {
+ $delta_end_max = $s_delta_end;
+ }
+
+ if ( $delta_beg_max <= $delta_end_max ) {
+ $score *= ($delta_beg_max+1) ** 2.0;
+ } else {
+ $score *= ($delta_end_max+1) ** 2.0;
+ }
+
+ return $score if $score > 0.25;
+
+ # Add penalty if the match is towards the narrow end of the
+ # search space,
+
+ if ( ($q_max - $q_min) <= ($s_max - $s_min) )
+ {
+ if ( $q_delta_beg > $s_delta_beg )
+ {
+ $score *= 2 * ( $q_delta_beg - $s_delta_beg ) ** 3;
+ }
+ elsif ( $q_delta_end > $s_delta_end )
+ {
+ $score *= 2 * ( $q_delta_end - $s_delta_end ) ** 3;
+ }
+ }
+ else
+ {
+ if ( $s_delta_beg > $q_delta_beg )
+ {
+ $score *= 2 * ( $s_delta_beg - $q_delta_beg ) ** 3;
+ }
+ elsif ( $s_delta_end > $q_delta_end )
+ {
+ $score *= 2 * ( $s_delta_end - $q_delta_end ) ** 3;
+ }
+ }
+
+ if ( $score < 0 ) {
+ print STDERR "q_min, q_max, s_min, s_max: $q_min, $q_max, $s_min, $s_max\n";
+ die qq (Score <= 0 -> $score);
+ }
+
+ return $score;
+}
+
+
+sub print_alignment
+{
+ # Martin A. Hansen, August 2006.
+
+ # Routine to print an alignment in fasta format based
+ # on a list of matches and two given sequences.
+
+ my ( $matches, # list of matches
+ $q_head, # query sequence head
+ $q_seq, # query sequence ref
+ $s_head, # subject sequence head
+ $s_seq, # subject sequence ref
+ ) = @_;
+
+ my ( $q_pos, $s_pos, $q_nomatch, $q_match, $s_nomatch, $match, $q_aseq, $s_aseq, $i );
+
+ @{ $matches } = sort { $a->[ Q_BEG ] <=> $b->[ Q_BEG ] } @{ $matches };
+
+ $q_pos = 0;
+ $s_pos = 0;
+
+ for ( $i = 0; $i < @{ $matches }; $i++ )
+ {
+ $match = $matches->[ $i ];
+
+ $q_nomatch = $match->[ Q_BEG ] - $q_pos;
+ $s_nomatch = $match->[ S_BEG ] - $s_pos;
+
+ if ( $q_nomatch - $s_nomatch > 0 )
+ {
+ $s_aseq .= "-" x ( $q_nomatch - $s_nomatch );
+ $s_aseq .= substr ${ $s_seq }, $s_pos, $s_nomatch + $match->[ LEN ];
+ $q_aseq .= substr ${ $q_seq }, $q_pos, $q_nomatch + $match->[ LEN ];
+ }
+ elsif ( $s_nomatch - $q_nomatch > 0 )
+ {
+ $q_aseq .= "-" x ( $s_nomatch - $q_nomatch );
+ $q_aseq .= substr ${ $q_seq }, $q_pos, $q_nomatch + $match->[ LEN ];
+ $s_aseq .= substr ${ $s_seq }, $s_pos, $s_nomatch + $match->[ LEN ];
+ }
+ else
+ {
+ $q_aseq .= substr ${ $q_seq }, $q_pos, $q_nomatch + $match->[ LEN ];
+ $s_aseq .= substr ${ $s_seq }, $s_pos, $s_nomatch + $match->[ LEN ];
+ }
+
+ $q_pos += $q_nomatch + $match->[ LEN ];
+ $s_pos += $s_nomatch + $match->[ LEN ];
+ }
+
+ $match = $matches->[ -1 ] || [ 0, 0, 0, 0, 0 ];
+
+ $q_nomatch = length( ${ $q_seq } ) - $match->[ Q_END ];
+ $s_nomatch = length( ${ $s_seq } ) - $match->[ S_END ];
+
+ if ( $q_nomatch - $s_nomatch > 0 )
+ {
+ $q_aseq .= substr ${ $q_seq }, $q_pos, $q_nomatch + $match->[ LEN ];
+ $s_aseq .= substr ${ $s_seq }, $s_pos, $s_nomatch + $match->[ LEN ];
+ $s_aseq .= "-" x ( $q_nomatch - $s_nomatch );
+ }
+ elsif ( $s_nomatch - $q_nomatch > 0 )
+ {
+ $q_aseq .= substr ${ $q_seq }, $q_pos, $q_nomatch + $match->[ LEN ];
+ $s_aseq .= substr ${ $s_seq }, $s_pos, $s_nomatch + $match->[ LEN ];
+ $q_aseq .= "-" x ( $s_nomatch - $q_nomatch );
+ }
+ else
+ {
+ $q_aseq .= substr ${ $q_seq }, $q_pos, $q_nomatch + $match->[ LEN ];
+ $s_aseq .= substr ${ $s_seq }, $s_pos, $s_nomatch + $match->[ LEN ];
+ }
+
+ print ">$q_head\n$q_aseq\n>$s_head\n$s_aseq\n";
+}
+
+
+sub print_matches
+{
+ # Martin A. Hansen, February 2007.
+
+ my ( $matches, # list of matches
+ $q_head, # query sequence head
+ $q_seq, # query sequence ref
+ $s_head, # subject sequence head
+ $s_seq, # subject sequence ref
+ $args, # argument hash - OPTIONAL
+ ) = @_;
+
+ $args->{ "wrap" } ||= 80;
+
+ my ( $q_pos, $s_pos, $match, $q_nomatch, $q_match, $s_nomatch, $q_aseq, $s_aseq, $pins, $i, $q, $s, $q_ruler, $s_ruler, $entries );
+
+ @{ $matches } = sort { $a->[ Q_BEG ] <=> $b->[ Q_BEG ] } @{ $matches };
+
+ $q_pos = 0;
+ $s_pos = 0;
+
+ for ( $i = 0; $i < @{ $matches }; $i++ )
+ {
+ $match = $matches->[ $i ];
+
+ $q_nomatch = $match->[ Q_BEG ] - $q_pos;
+ $s_nomatch = $match->[ S_BEG ] - $s_pos;
+
+ $q = $q_pos;
+ $s = $s_pos;
+
+ if ( $q_nomatch - $s_nomatch > 0 )
+ {
+ $q_aseq .= substr ${ $q_seq }, $q_pos, ( $q_nomatch - $s_nomatch );
+ $s_aseq .= "-" x ( $q_nomatch - $s_nomatch );
+ $pins .= " " x ( $q_nomatch - $s_nomatch );
+ $q += ( $q_nomatch - $s_nomatch );
+ }
+ elsif ( $s_nomatch - $q_nomatch > 0 )
+ {
+ $s_aseq .= substr ${ $s_seq }, $s_pos, ( $s_nomatch - $q_nomatch );
+ $q_aseq .= "-" x ( $s_nomatch - $q_nomatch );
+ $pins .= " " x ( $s_nomatch - $q_nomatch );
+ $s += ( $s_nomatch - $q_nomatch );
+ }
+
+ while ( $q < $q_pos + $q_nomatch and $s < $s_pos + $s_nomatch )
+ {
+ $q_aseq .= substr ${ $q_seq }, $q, 1;
+ $s_aseq .= substr ${ $s_seq }, $s, 1;
+
+ if ( substr( ${ $q_seq }, $q, 1 ) eq substr( ${ $s_seq }, $s, 1 ) )
+ {
+ $pins .= ":";
+ } else {
+ $pins .= " ";
+ }
+
+ $q++;
+ $s++;
+ }
+
+ $q_aseq .= substr ${ $q_seq }, $match->[ Q_BEG ], $match->[ LEN ];
+ $s_aseq .= substr ${ $s_seq }, $match->[ S_BEG ], $match->[ LEN ];
+ $pins .= "|" x $match->[ LEN ];
+
+ $q_pos += $q_nomatch + $match->[ LEN ];
+ $s_pos += $s_nomatch + $match->[ LEN ];
+ }
+
+ $q_nomatch = length( ${ $q_seq } ) - ( $match->[ Q_END ] || 0 );
+ $s_nomatch = length( ${ $s_seq } ) - ( $match->[ S_END ] || 0 );
+
+ $q = $q_pos;
+ $s = $s_pos;
+
+ while ( $q < $q_pos + $q_nomatch and $q < length ${ $q_seq } and $s < $s_pos + $s_nomatch and $s < length ${ $s_seq } )
+ {
+ $q_aseq .= substr ${ $q_seq }, $q, 1;
+ $s_aseq .= substr ${ $s_seq }, $s, 1;
+
+ if ( substr( ${ $q_seq }, $q, 1 ) eq substr( ${ $s_seq }, $s, 1 ) ) {
+ $pins .= ":";
+ } else {
+ $pins .= " ";
+ }
+
+ $q++;
+ $s++;
+ $q_pos++;
+ $s_pos++;
+ }
+
+ if ( $q_nomatch - $s_nomatch > 0 )
+ {
+ $q_aseq .= substr ${ $q_seq }, $q_pos, ( $q_nomatch - $s_nomatch );
+ $s_aseq .= "-" x ( $q_nomatch - $s_nomatch );
+ $pins .= " " x ( $q_nomatch - $s_nomatch );
+ }
+ elsif ( $s_nomatch - $q_nomatch > 0 )
+ {
+ $s_aseq .= substr ${ $s_seq }, $s_pos, ( $s_nomatch - $q_nomatch );
+ $q_aseq .= "-" x ( $s_nomatch - $q_nomatch );
+ $pins .= " " x ( $s_nomatch - $q_nomatch );
+ }
+
+ $q_ruler = &make_ruler( $q_aseq );
+ $s_ruler = &make_ruler( $s_aseq );
+
+ $entries = [
+ [ "ruler", $q_ruler ],
+ [ $q_head, $q_aseq ],
+ [ "consensus", $pins ],
+ [ $s_head, $s_aseq ],
+ [ "ruler", $s_ruler ],
+ ];
+
+ &align_print_multi( $entries, undef, $args->{ "wrap" } )
+}
+
+
+sub make_ruler
+{
+ # Martin A. Hansen, February 2007;
+
+ # Gererates a ruler for a given sequence (with indels).
+
+ my ( $seq
+ ) = @_;
+
+ # Returns string
+
+ my ( $i, $char, $skip, $count, $gap, $tics );
+
+ $gap = 0;
+ $i = 1;
+
+ while ( $i <= length $seq )
+ {
+ $char = substr $seq, $i - 1, 1;
+
+ $gap++ if $char eq "-";
+
+ if ( $skip )
+ {
+ $skip--;
+ }
+ else
+ {
+ $count = $i - $gap;
+ $count = 1 if $char eq "-";
+
+ if ( $count % 100 == 0 )
+ {
+ if ( $count + length( $count ) >= length $seq )
+ {
+ $tics .= "|";
+ }
+ else
+ {
+ $tics .= "|" . $count;
+ $skip = length $count;
+ }
+ }
+ elsif ( $count % 50 == 0 ) {
+ $tics .= ":";
+ } elsif ( $count % 10 == 0 ) {
+ $tics .= ".";
+ } else {
+ $tics .= " ";
+ }
+ }
+
+ $i++;
+ }
+
+ return $tics;
+}
+
+
+sub align_sim_local
+{
+ # Martin A. Hansen, May 2007.
+
+ # Calculate the local similarity of an alignment based on
+ # an alignment chain. The similarity is calculated as
+ # the number of matching residues divided by the overall
+ # length of the alignment chain. This means that a short
+ # but "good" alignment will yield a high similarity, while
+ # a long "poor" alignment will yeild a low similarity.
+
+ my ( $chain, # list of matches in alignment
+ ) = @_;
+
+ # returns a float
+
+ my ( $match, $match_tot, $q_beg, $q_end, $s_beg, $s_end, $q_diff, $s_diff, $max, $sim );
+
+ return 0 if not @{ $chain };
+
+ $match_tot = 0;
+ $q_end = 0;
+ $s_end = 0;
+ $q_beg = 999999999;
+ $s_beg = 999999999;
+
+ foreach $match ( @{ $chain } )
+ {
+ $match_tot += $match->[ LEN ];
+
+ $q_beg = $match->[ Q_BEG ] if $match->[ Q_BEG ] < $q_beg;
+ $s_beg = $match->[ S_BEG ] if $match->[ S_BEG ] < $s_beg;
+
+ $q_end = $match->[ Q_END ] if $match->[ Q_END ] > $q_end;
+ $s_end = $match->[ S_END ] if $match->[ S_END ] > $s_end;
+ }
+
+ $q_diff = $q_end - $q_beg + 1;
+ $s_diff = $s_end - $s_beg + 1;
+
+ $max = &Maasha::Calc::max( $q_diff, $s_diff );
+
+ $sim = sprintf( "%.2f", ( $match_tot / $max ) * 100 );
+
+ return $sim;
+}
+
+
+sub align_sim_global
+{
+ # Martin A. Hansen, June 2007.
+
+ # Calculate the global similarity of an alignment based on
+ # an alignment chain. The similarity is calculated as
+ # the number of matching residues divided by the
+ # length of the shortest sequence.
+
+ my ( $chain, # list of matches in alignment
+ $q_seq, # ref to query sequence
+ $s_seq, # ref to subject sequence
+ ) = @_;
+
+ # returns a float
+
+ my ( $match_tot, $min, $sim );
+
+ return 0 if not @{ $chain };
+
+ $match_tot = 0;
+
+ map { $match_tot += $_->[ LEN ] } @{ $chain };
+
+ $min = &Maasha::Calc::min( length( ${ $q_seq } ), length( ${ $s_seq } ) );
+
+ $sim = sprintf( "%.2f", ( $match_tot / $min ) * 100 );
+
+ return $sim;
+}
+
+
+sub align_consensus
+{
+ # Martin A. Hansen, June 2006.
+
+ # Given an alignment as a list of FASTA entries,
+ # generates a consensus sequences based on the
+ # entropies for each column similar to the way
+ # a sequence logo i calculated. Returns the
+ # consensus sequence as a FASTA entry.
+
+ my ( $entries, # list of aligned FASTA entries
+ $type, # residue type - OPTIONAL
+ $min_sim, # minimum similarity - OPTIONAL
+ ) = @_;
+
+ # Returns tuple
+
+ my ( $bit_max, $data, $pos, $char, $score, $entry );
+
+ $type ||= &Maasha::Seq::seq_guess_type( $entries->[ 0 ] );
+ $min_sim ||= 50;
+
+ if ( $type =~ /protein/ ) {
+ $bit_max = 4;
+ } else {
+ $bit_max = 2;
+ }
+
+ $data = &Maasha::Seq::seqlogo_calc( $bit_max, $entries );
+
+ foreach $pos ( @{ $data } )
+ {
+ ( $char, $score ) = @{ $pos->[ -1 ] };
+
+ if ( ( $score / $bit_max ) * 100 >= $min_sim ) {
+ $entry->[ SEQ ] .= $char;
+ } else {
+ $entry->[ SEQ ] .= "-";
+ }
+ }
+
+ $entry->[ HEAD ] = "Consensus: $min_sim%";
+
+ return wantarray ? @{ $entry } : $entry;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::Berkeley_DB;
+
+
+# Copyright (C) 2007-2008 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for Berkeley DB manipulation.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Maasha::Common;
+use DB_File;
+
+use vars qw( @ISA @EXPORT_OK );
+
+require Exporter;
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub db_init
+{
+ # Martin A. Hansen, May 2008.
+
+ # Initializes a Berkeley DB tied to a Perl hash.
+
+ my ( $path, # path to BDB file.
+ ) = @_;
+
+ # Returns hashref
+
+ my ( %hash );
+
+ tie %hash, "DB_File", $path or &Maasha::Common::error( "Could not tie-open DB file '$path': $!" );
+
+ return wantarray ? %hash : \%hash;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+1;
--- /dev/null
+package Maasha::Biotools;
+
+
+# Copyright (C) 2007-2008 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for manipulation, parsing and emitting of human/machine readable biotool records.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Getopt::Long qw( :config bundling );
+use Time::HiRes qw( gettimeofday );
+use Storable qw( dclone );
+use Maasha::Config;
+use Maasha::Common;
+use Maasha::Fasta;
+use Maasha::Align;
+use Maasha::Matrix;
+use Maasha::Match;
+use Maasha::EMBL;
+use Maasha::Stockholm;
+use Maasha::Seq;
+use Maasha::Patscan;
+use Maasha::Plot;
+use Maasha::Calc;
+use Maasha::UCSC;
+use Maasha::NCBI;
+use Maasha::GFF;
+use Maasha::TwoBit;
+use Maasha::Solid;
+use Maasha::SQL;
+
+use vars qw( @ISA @EXPORT_OK );
+
+require Exporter;
+
+@ISA = qw( Exporter );
+
+@EXPORT_OK = qw(
+ read_stream
+ write_stream
+ get_record
+ put_record
+);
+
+use constant {
+ SEQ_NAME => 0,
+ SEQ => 1,
+};
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SIGNAL HANDLER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+$SIG{ '__DIE__' } = \&sig_handler;
+$SIG{ 'INT' } = \&sig_handler;
+$SIG{ 'TERM' } = \&sig_handler;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> GLOBALS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $script, $TMP_DIR );
+
+$script = &Maasha::Common::get_scriptname();
+$TMP_DIR = &Maasha::Common::get_tmpdir();
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> LOG <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my $log_fh = &Maasha::Common::append_open( $ENV{ "LOG_DIR" } . "/biopieces.log" );
+
+$log_fh->autoflush( 1 );
+
+&log( $log_fh, $script, \@ARGV );
+
+close $log_fh;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RUN SCRIPT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my $t0 = gettimeofday();
+
+&run_script( $script );
+
+my $t1 = gettimeofday();
+
+print STDERR "Program: $script" . ( " " x ( 25 - length( $script ) ) ) . sprintf( "Run time: %.4f\n", ( $t1 - $t0 ) );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub log
+{
+ # Martin A. Hansen, January 2008.
+
+ # Log messages to logfile.
+
+ my ( $fh, # filehandle to logfile
+ $script, # script name
+ $argv, # reference to @ARGV
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $time_stamp, $user );
+
+ $time_stamp = &Maasha::Common::time_stamp();
+
+ $user = $ENV{ "USER" };
+
+ $script = "biopieces" if $script eq "-e";
+
+ print $fh "$time_stamp\t$user\t$script ", join( " ", @{ $argv } ), "\n";
+}
+
+
+sub run_script
+{
+ # Martin A. Hansen, August 2007.
+
+ # Run a specific script.
+
+ my ( $script, # script name
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $options, $in, $out );
+
+ &script_list_biotools( $ENV{ 'INST_DIR'} . "/biotools/usage/" ) if $script eq "list_biotools";
+
+ &script_print_usage( $ENV{ 'INST_DIR'} . "/biotools/usage/$script" ) if -t STDIN and not @ARGV;
+
+ $options = &get_options( $script );
+
+ $in = &read_stream( $options->{ "stream_in" } );
+ $out = &write_stream( $options->{ "stream_out" } );
+
+ if ( $script eq "read_fasta" ) { &script_read_fasta( $in, $out, $options ) }
+ elsif ( $script eq "read_align" ) { &script_read_align( $in, $out, $options ) }
+ elsif ( $script eq "read_tab" ) { &script_read_tab( $in, $out, $options ) }
+ elsif ( $script eq "read_psl" ) { &script_read_psl( $in, $out, $options ) }
+ elsif ( $script eq "read_bed" ) { &script_read_bed( $in, $out, $options ) }
+ elsif ( $script eq "read_blast_tab" ) { &script_read_blast_tab( $in, $out, $options ) }
+ elsif ( $script eq "read_embl" ) { &script_read_embl( $in, $out, $options ) }
+ elsif ( $script eq "read_stockholm" ) { &script_read_stockholm( $in, $out, $options ) }
+ elsif ( $script eq "read_phastcons" ) { &script_read_phastcons( $in, $out, $options ) }
+ elsif ( $script eq "read_soft" ) { &script_read_soft( $in, $out, $options ) }
+ elsif ( $script eq "read_gff" ) { &script_read_gff( $in, $out, $options ) }
+ elsif ( $script eq "read_2bit" ) { &script_read_2bit( $in, $out, $options ) }
+ elsif ( $script eq "read_solexa" ) { &script_read_solexa( $in, $out, $options ) }
+ elsif ( $script eq "read_solid" ) { &script_read_solid( $in, $out, $options ) }
+ elsif ( $script eq "read_mysql" ) { &script_read_mysql( $in, $out, $options ) }
+ elsif ( $script eq "count_seq" ) { &script_count_seq( $in, $out, $options ) }
+ elsif ( $script eq "length_seq" ) { &script_length_seq( $in, $out, $options ) }
+ elsif ( $script eq "uppercase_seq" ) { &script_uppercase_seq( $in, $out, $options ) }
+ elsif ( $script eq "shuffle_seq" ) { &script_shuffle_seq( $in, $out, $options ) }
+ elsif ( $script eq "analyze_seq" ) { &script_analyze_seq( $in, $out, $options ) }
+ elsif ( $script eq "analyze_tags" ) { &script_analyze_tags( $in, $out, $options ) }
+ elsif ( $script eq "complexity_seq" ) { &script_complexity_seq( $in, $out, $options ) }
+ elsif ( $script eq "oligo_freq" ) { &script_oligo_freq( $in, $out, $options ) }
+ elsif ( $script eq "create_weight_matrix" ) { &script_create_weight_matrix( $in, $out, $options ) }
+ elsif ( $script eq "calc_bit_scores" ) { &script_calc_bit_scores( $in, $out, $options ) }
+ elsif ( $script eq "reverse_seq" ) { &script_reverse_seq( $in, $out, $options ) }
+ elsif ( $script eq "complement_seq" ) { &script_complement_seq( $in, $out, $options ) }
+ elsif ( $script eq "remove_indels" ) { &script_remove_indels( $in, $out, $options ) }
+ elsif ( $script eq "transliterate_seq" ) { &script_transliterate_seq( $in, $out, $options ) }
+ elsif ( $script eq "transliterate_vals" ) { &script_transliterate_vals( $in, $out, $options ) }
+ elsif ( $script eq "translate_seq" ) { &script_translate_seq( $in, $out, $options ) }
+ elsif ( $script eq "extract_seq" ) { &script_extract_seq( $in, $out, $options ) }
+ elsif ( $script eq "get_genome_seq" ) { &script_get_genome_seq( $in, $out, $options ) }
+ elsif ( $script eq "get_genome_align" ) { &script_get_genome_align( $in, $out, $options ) }
+ elsif ( $script eq "get_genome_phastcons" ) { &script_get_genome_phastcons( $in, $out, $options ) }
+ elsif ( $script eq "fold_seq" ) { &script_fold_seq( $in, $out, $options ) }
+ elsif ( $script eq "split_seq" ) { &script_split_seq( $in, $out, $options ) }
+ elsif ( $script eq "split_bed" ) { &script_split_bed( $in, $out, $options ) }
+ elsif ( $script eq "align_seq" ) { &script_align_seq( $in, $out, $options ) }
+ elsif ( $script eq "tile_seq" ) { &script_tile_seq( $in, $out, $options ) }
+ elsif ( $script eq "invert_align" ) { &script_invert_align( $in, $out, $options ) }
+ elsif ( $script eq "patscan_seq" ) { &script_patscan_seq( $in, $out, $options ) }
+ elsif ( $script eq "create_blast_db" ) { &script_create_blast_db( $in, $out, $options ) }
+ elsif ( $script eq "blast_seq" ) { &script_blast_seq( $in, $out, $options ) }
+ elsif ( $script eq "blat_seq" ) { &script_blat_seq( $in, $out, $options ) }
+ elsif ( $script eq "match_seq" ) { &script_match_seq( $in, $out, $options ) }
+ elsif ( $script eq "create_vmatch_index" ) { &script_create_vmatch_index( $in, $out, $options ) }
+ elsif ( $script eq "vmatch_seq" ) { &script_vmatch_seq( $in, $out, $options ) }
+ elsif ( $script eq "write_fasta" ) { &script_write_fasta( $in, $out, $options, $options ) }
+ elsif ( $script eq "write_align" ) { &script_write_align( $in, $out, $options ) }
+ elsif ( $script eq "write_blast" ) { &script_write_blast( $in, $out, $options ) }
+ elsif ( $script eq "write_tab" ) { &script_write_tab( $in, $out, $options ) }
+ elsif ( $script eq "write_bed" ) { &script_write_bed( $in, $out, $options ) }
+ elsif ( $script eq "write_psl" ) { &script_write_psl( $in, $out, $options ) }
+ elsif ( $script eq "write_2bit" ) { &script_write_2bit( $in, $out, $options, $options ) }
+ elsif ( $script eq "write_solid" ) { &script_write_solid( $in, $out, $options, $options ) }
+ elsif ( $script eq "head_records" ) { &script_head_records( $in, $out, $options ) }
+ elsif ( $script eq "remove_keys" ) { &script_remove_keys( $in, $out, $options ) }
+ elsif ( $script eq "rename_keys" ) { &script_rename_keys( $in, $out, $options ) }
+ elsif ( $script eq "uniq_vals" ) { &script_uniq_vals( $in, $out, $options ) }
+ elsif ( $script eq "merge_vals" ) { &script_merge_vals( $in, $out, $options ) }
+ elsif ( $script eq "grab" ) { &script_grab( $in, $out, $options ) }
+ elsif ( $script eq "compute" ) { &script_compute( $in, $out, $options ) }
+ elsif ( $script eq "flip_tab" ) { &script_flip_tab( $in, $out, $options ) }
+ elsif ( $script eq "add_ident" ) { &script_add_ident( $in, $out, $options ) }
+ elsif ( $script eq "count_records" ) { &script_count_records( $in, $out, $options ) }
+ elsif ( $script eq "random_records" ) { &script_random_records( $in, $out, $options ) }
+ elsif ( $script eq "sort_records" ) { &script_sort_records( $in, $out, $options ) }
+ elsif ( $script eq "count_vals" ) { &script_count_vals( $in, $out, $options ) }
+ elsif ( $script eq "plot_histogram" ) { &script_plot_histogram( $in, $out, $options ) }
+ elsif ( $script eq "plot_lendist" ) { &script_plot_lendist( $in, $out, $options ) }
+ elsif ( $script eq "plot_chrdist" ) { &script_plot_chrdist( $in, $out, $options ) }
+ elsif ( $script eq "plot_karyogram" ) { &script_plot_karyogram( $in, $out, $options ) }
+ elsif ( $script eq "plot_matches" ) { &script_plot_matches( $in, $out, $options ) }
+ elsif ( $script eq "plot_seqlogo" ) { &script_plot_seqlogo( $in, $out, $options ) }
+ elsif ( $script eq "plot_phastcons_profiles" ) { &script_plot_phastcons_profiles( $in, $out, $options ) }
+ elsif ( $script eq "analyze_bed" ) { &script_analyze_bed( $in, $out, $options ) }
+ elsif ( $script eq "analyze_vals" ) { &script_analyze_vals( $in, $out, $options ) }
+ elsif ( $script eq "length_vals" ) { &script_length_vals( $in, $out, $options ) }
+ elsif ( $script eq "sum_vals" ) { &script_sum_vals( $in, $out, $options ) }
+ elsif ( $script eq "mean_vals" ) { &script_mean_vals( $in, $out, $options ) }
+ elsif ( $script eq "median_vals" ) { &script_median_vals( $in, $out, $options ) }
+ elsif ( $script eq "max_vals" ) { &script_max_vals( $in, $out, $options ) }
+ elsif ( $script eq "min_vals" ) { &script_min_vals( $in, $out, $options ) }
+ elsif ( $script eq "upload_to_ucsc" ) { &script_upload_to_ucsc( $in, $out, $options ) }
+
+ close $in if defined $in;
+ close $out;
+
+ # unset status - missing
+ # write log file - missing
+}
+
+
+sub get_options
+{
+ # Martin A. Hansen, February 2008.
+
+ # Gets options from commandline and checks these vigerously.
+
+ my ( $script, # name of script
+ ) = @_;
+
+ # Returns hash
+
+ my ( %options, @options, $opt, @genomes );
+
+ if ( $script eq "read_fasta" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_align" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_tab" )
+ {
+ @options = qw(
+ data_in|i=s
+ delimit|d=s
+ cols|c=s
+ keys|k=s
+ skip|s=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_psl" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_bed" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_blast_tab" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_embl" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ keys|k=s
+ feats|f=s
+ quals|q=s
+ );
+ }
+ elsif ( $script eq "read_stockholm" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_phastcons" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ min|m=s
+ dist|d=s
+ threshold|t=f
+ gap|g=s
+ );
+ }
+ elsif ( $script eq "read_soft" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_gff" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ );
+ }
+ elsif ( $script eq "read_2bit" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ no_mask|N
+ );
+ }
+ elsif ( $script eq "read_solexa" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ quality|q=s
+ );
+ }
+ elsif ( $script eq "read_solid" )
+ {
+ @options = qw(
+ data_in|i=s
+ num|n=s
+ quality|q=s
+ );
+ }
+ elsif ( $script eq "read_mysql" )
+ {
+ @options = qw(
+ database|d=s
+ query|q=s
+ user|u=s
+ password|p=s
+ );
+ }
+ elsif ( $script eq "count_seq" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ );
+ }
+ elsif ( $script eq "length_seq" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ );
+ }
+ elsif ( $script eq "oligo_freq" )
+ {
+ @options = qw(
+ word_size|w=s
+ all|a
+ );
+ }
+ elsif ( $script eq "create_weight_matrix" )
+ {
+ @options = qw(
+ percent|p
+ );
+ }
+ elsif ( $script eq "transliterate_seq" )
+ {
+ @options = qw(
+ search|s=s
+ replace|r=s
+ delete|d=s
+ );
+ }
+ elsif ( $script eq "transliterate_vals" )
+ {
+ @options = qw(
+ keys|k=s
+ search|s=s
+ replace|r=s
+ delete|d=s
+ );
+ }
+ elsif ( $script eq "translate_seq" )
+ {
+ @options = qw(
+ frames|f=s
+ );
+ }
+ elsif ( $script eq "extract_seq" )
+ {
+ @options = qw(
+ beg|b=s
+ end|e=s
+ len|l=s
+ );
+ }
+ elsif ( $script eq "get_genome_seq" )
+ {
+ @options = qw(
+ genome|g=s
+ chr|c=s
+ beg|b=s
+ end|e=s
+ len|l=s
+ flank|f=s
+ mask|m
+ );
+ }
+ elsif ( $script eq "get_genome_align" )
+ {
+ @options = qw(
+ genome|g=s
+ chr|c=s
+ beg|b=s
+ end|e=s
+ len|l=s
+ strand|s=s
+ );
+ }
+ elsif ( $script eq "get_genome_phastcons" )
+ {
+ @options = qw(
+ genome|g=s
+ chr|c=s
+ beg|b=s
+ end|e=s
+ len|l=s
+ flank|f=s
+ );
+ }
+ elsif ( $script eq "split_seq" )
+ {
+ @options = qw(
+ word_size|w=s
+ uniq|u
+ );
+ }
+ elsif ( $script eq "split_bed" )
+ {
+ @options = qw(
+ window_size|w=s
+ step_size|s=s
+ );
+ }
+ elsif ( $script eq "tile_seq" )
+ {
+ @options = qw(
+ identity|i=s
+ supress_indels|s
+ );
+ }
+ elsif ( $script eq "invert_align" )
+ {
+ @options = qw(
+ soft|s
+ );
+ }
+ elsif ( $script eq "patscan_seq" )
+ {
+ @options = qw(
+ patterns|p=s
+ patterns_in|P=s
+ comp|c
+ max_hits|h=s
+ max_misses|m=s
+ genome|g=s
+ );
+ }
+ elsif ( $script eq "create_blast_db" )
+ {
+ @options = qw(
+ no_stream|x
+ database|d=s
+ );
+ }
+ elsif ( $script eq "blast_seq" )
+ {
+ @options = qw(
+ database|d=s
+ genome|g=s
+ program|p=s
+ e_val|e=f
+ filter|f
+ cpus|c=s
+ no_filter|F
+ );
+ }
+ elsif ( $script eq "blat_seq" )
+ {
+ @options = qw(
+ genome|g=s
+ tile_size|t=s
+ step_size|s=s
+ min_identity|m=s
+ min_score|M=s
+ one_off|o=s
+ ooc|c
+ );
+ }
+ elsif ( $script eq "match_seq" )
+ {
+ @options = qw(
+ word_size|w=s
+ direction|d=s
+ );
+ }
+ elsif ( $script eq "create_vmatch_index" )
+ {
+ @options = qw(
+ index_name|i=s
+ prefix_length|p=s
+ no_stream|x
+ );
+ }
+ elsif ( $script eq "vmatch_seq" )
+ {
+ @options = qw(
+ genome|g=s
+ index_name|i=s
+ count|c
+ max_hits|m=s
+ hamming_dist|h=s
+ edit_dist|e=s
+ );
+ }
+ elsif ( $script eq "write_fasta" )
+ {
+ @options = qw(
+ wrap|w=s
+ no_stream|x
+ data_out|o=s
+ compress|Z
+ );
+ }
+ elsif ( $script eq "write_align" )
+ {
+ @options = qw(
+ wrap|w=s
+ no_stream|x
+ no_ruler|R
+ no_consensus|C
+ data_out|o=s
+ );
+ }
+ elsif ( $script eq "write_blast" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ comment|c
+ compress|Z
+ );
+ }
+ elsif ( $script eq "write_tab" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ delimit|d=s
+ keys|k=s
+ no_keys|K=s
+ comment|c
+ compress|Z
+ );
+ }
+ elsif ( $script eq "write_bed" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ compress|Z
+ );
+ }
+ elsif ( $script eq "write_psl" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ compress|Z
+ );
+ }
+ elsif ( $script eq "write_2bit" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ no_mask|N
+ );
+ }
+ elsif ( $script eq "write_solid" )
+ {
+ @options = qw(
+ wrap|w=s
+ no_stream|x
+ data_out|o=s
+ compress|Z
+ );
+ }
+ elsif ( $script eq "plot_seqlogo" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ );
+ }
+ elsif ( $script eq "plot_phastcons_profiles" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ genome|g=s
+ mean|m
+ median|M
+ flank|f=s
+ terminal|t=s
+ title|T=s
+ xlabel|X=s
+ ylabel|Y=s
+ );
+ }
+ elsif ( $script eq "analyze_vals" )
+ {
+ @options = qw(
+ no_stream|x
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "head_records" )
+ {
+ @options = qw(
+ num|n=s
+ );
+ }
+ elsif ( $script eq "remove_keys" )
+ {
+ @options = qw(
+ keys|k=s
+ save_keys|K=s
+ );
+ }
+ elsif ( $script eq "rename_keys" )
+ {
+ @options = qw(
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "uniq_vals" )
+ {
+ @options = qw(
+ key|k=s
+ invert|i
+ );
+ }
+ elsif ( $script eq "merge_vals" )
+ {
+ @options = qw(
+ keys|k=s
+ delimit|d=s
+ );
+ }
+ elsif ( $script eq "grab" )
+ {
+ @options = qw(
+ patterns|p=s
+ patterns_in|P=s
+ regex|r=s
+ eval|e=s
+ exact_in|E=s
+ invert|i
+ case_insensitive|c
+ keys|k=s
+ keys_only|K
+ vals_only|V
+ );
+ }
+ elsif ( $script eq "compute" )
+ {
+ @options = qw(
+ eval|e=s
+ );
+ }
+ elsif ( $script eq "add_ident" )
+ {
+ @options = qw(
+ prefix|p=s
+ key|k=s
+ );
+ }
+ elsif ( $script eq "count_records" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ );
+ }
+ elsif ( $script eq "random_records" )
+ {
+ @options = qw(
+ num|n=s
+ );
+ }
+ elsif ( $script eq "sort_records" )
+ {
+ @options = qw(
+ reverse|r
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "count_vals" )
+ {
+ @options = qw(
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "plot_histogram" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ terminal|t=s
+ title|T=s
+ xlabel|X=s
+ ylabel|Y=s
+ key|k=s
+ sort|s=s
+ );
+ }
+ elsif ( $script eq "plot_lendist" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ terminal|t=s
+ title|T=s
+ xlabel|X=s
+ ylabel|Y=s
+ key|k=s
+ );
+ }
+ elsif ( $script eq "plot_chrdist" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ terminal|t=s
+ title|T=s
+ xlabel|X=s
+ ylabel|Y=s
+ );
+ }
+ elsif ( $script eq "plot_karyogram" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ genome|g=s
+ feat_color|f=s
+ );
+ }
+ elsif ( $script eq "plot_matches" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ terminal|t=s
+ title|T=s
+ xlabel|X=s
+ ylabel|Y=s
+ direction|d=s
+ );
+ }
+ elsif ( $script eq "length_vals" )
+ {
+ @options = qw(
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "sum_vals" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "mean_vals" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "median_vals" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "max_vals" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "min_vals" )
+ {
+ @options = qw(
+ no_stream|x
+ data_out|o=s
+ keys|k=s
+ );
+ }
+ elsif ( $script eq "upload_to_ucsc" )
+ {
+ @options = qw(
+ no_stream|x
+ database|d=s
+ table|t=s
+ short_label|s=s
+ long_label|l=s
+ group|g=s
+ priority|p=f
+ use_score|u
+ visibility|v=s
+ wiggle|w
+ color|c=s
+ chunk_size|C=s
+ );
+ }
+
+ push @options, qw(
+ stream_in|I=s
+ stream_out|O=s
+ verbose
+ );
+
+# print STDERR Dumper( \@options );
+
+ GetOptions(
+ \%options,
+ @options,
+ );
+
+ $options{ "cols" } = [ split ",", $options{ "cols" } ] if defined $options{ "cols" };
+ $options{ "keys" } = [ split ",", $options{ "keys" } ] if defined $options{ "keys" };
+ $options{ "no_keys" } = [ split ",", $options{ "no_keys" } ] if defined $options{ "no_keys" };
+ $options{ "save_keys" } = [ split ",", $options{ "save_keys" } ] if defined $options{ "save_keys" };
+ $options{ "quals" } = [ split ",", $options{ "quals" } ] if defined $options{ "quals" };
+ $options{ "feats" } = [ split ",", $options{ "feats" } ] if defined $options{ "feats" };
+ $options{ "frames" } = [ split ",", $options{ "frames" } ] if defined $options{ "frames" };
+
+ # ---- check arguments ----
+
+ if ( $options{ 'data_in' } )
+ {
+ $options{ "files" } = &getopt_files( $options{ 'data_in' } );
+
+ &Maasha::Common::error( qq(Argument to --data_in must be a valid file or fileglob expression) ) if scalar @{ $options{ "files" } } == 0;
+ }
+
+ map { &Maasha::Common::error( qq(Argument to --cols must be a whole numbers - not "$_") ) if $_ !~ /^\d+$/ } @{ $options{ "cols" } } if $options{ "cols" };
+
+# print STDERR Dumper( \%options );
+
+ foreach $opt ( keys %options )
+ {
+ if ( $opt =~ /stream_in|pattern_in|exact_in/ and not -f $options{ $opt } )
+ {
+ &Maasha::Common::error( qq(Argument to --$opt must be a valid file or fileglob expression - not "$options{ $opt }") );
+ }
+ elsif ( $opt =~ /beg|end|word_size|wrap|chunk_size|tile_size|len|prefix_length|num|skip|cpus|window_size|step_size/ and $options{ $opt } !~ /^\d+$/ )
+ {
+ &Maasha::Common::error( qq(Argument to --$opt must be a whole number - not "$options{ $opt }") );
+ }
+ elsif ( $opt =~ /max_hits|max_hits|max_misses|dist|edit_dist|flank|gap|hamming_dist|priority/ and $options{ $opt } !~ /^-?\d+$/ )
+ {
+ &Maasha::Common::error( qq(Argument to --$opt must be an integer - not "$options{ $opt }") );
+ }
+ elsif ( $opt =~ /identity|threshold/ and $options{ $opt } !~ /^-?(?:\d+(?:\.\d*)?|\.\d+)$/ )
+ {
+ &Maasha::Common::error( qq(Argument to --$opt must be a decimal number - not "$options{ $opt }") );
+ }
+ elsif ( $opt =~ /e_val/ and $options{ $opt } !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/ )
+ {
+ &Maasha::Common::error( qq(Argument to --$opt must be a float - not "$options{ $opt }") );
+ }
+ elsif ( $opt =~ /strand/ and $options{ $opt } !~ /^(\+|-)$/ )
+ {
+ &Maasha::Common::error( qq(Argument to --$opt must be "+" or "-" - not "$options{ $opt }") );
+ }
+ elsif ( $opt eq "genome" )
+ {
+ @genomes = &Maasha::Config::genomes();
+
+ if ( not grep $options{ $opt }, @genomes ) {
+ &Maasha::Common::error( qq(Genome $options{ $opt } not found in "$ENV{ 'INST_DIR' }/conf/genomes.conf") );
+ }
+ }
+ elsif ( $opt eq "terminal" and not $options{ $opt } =~ /^(svg|post|dumb)/ )
+ {
+ &Maasha::Common::error( qq(Bad --$opt argument "$options{ $opt }") );
+ }
+ elsif ( $opt eq "table" and $options{ $opt } =~ /-\./ )
+ {
+ &Maasha::Common::error( qq(Character '$options{ $opt }' is not allowed in table names) );
+ }
+ }
+
+ &Maasha::Common::error( qq(no --database specified) ) if $script eq "create_blast_db" and not $options{ "database" };
+ &Maasha::Common::error( qq(no --index_name specified) ) if $script eq "create_vmatch_index" and not $options{ "index_name" };
+ &Maasha::Common::error( qq(no --database or --genome specified) ) if $script eq "blast_seq" and not $options{ "genome" } and not $options{ "database" };
+ &Maasha::Common::error( qq(both --database and --genome specified) ) if $script eq "blast_seq" and $options{ "genome" } and $options{ "database" };
+ &Maasha::Common::error( qq(no --index_name or --genome specified) ) if $script eq "vmatch_seq" and not $options{ "genome" } and not $options{ "index_name" };
+ &Maasha::Common::error( qq(both --index and --genome specified) ) if $script eq "vmatch_seq" and $options{ "genome" } and $options{ "index" };
+ &Maasha::Common::error( qq(no --genome specified) ) if $script =~ /get_genome_seq|get_genome_align|get_genome_phastcons|blat_seq|plot_phastcons_profiles|plot_karyogram/ and not $options{ "genome" };
+ &Maasha::Common::error( qq(no --key specified) ) if $script =~ /plot_lendist|plot_histogram/ and not $options{ "key" };
+ &Maasha::Common::error( qq(no --keys speficied) ) if $script =~ /sort_records|count_vals|sum_vals|mean_vals|median_vals|length_vals/ and not $options{ "keys" };
+
+ if ( $script eq "upload_to_ucsc" )
+ {
+ &Maasha::Common::error( qq(no --database specified) ) if not $options{ "database" };
+ &Maasha::Common::error( qq(no --table specified) ) if not $options{ "table" };
+ }
+
+ return wantarray ? %options : \%options;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SCRIPTS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub script_print_usage
+{
+ # Martin A. Hansen, January 2008.
+
+ # Retrieves usage information from file and
+ # prints this nicely formatted.
+
+ my ( $path, # full path to usage file
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $script, $fh, $line, @lines, @list, %hash, $key );
+
+ $script = ( split "/", $path )[ -1 ];
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ push @list, "Program name";
+
+ $hash{ "Program name" } = [ $script ];
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ $line =~ s/\$script/$script/g;
+
+ if ( $line =~ /^([^:]+):\s+(.+)$/ )
+ {
+ push @list, $1 if not exists $hash{ $1 };
+ push @{ $hash{ $1 } }, $2;
+ }
+ }
+
+ close $fh;
+
+ print "\n";
+
+ foreach $key ( @list )
+ {
+ if ( scalar @{ $hash{ $key } } == 1 )
+ {
+ @lines = &Maasha::Common::wrap_line( $hash{ $key }->[ 0 ], 80 );
+
+ printf( "%-15s%s\n", "$key:", shift @lines );
+
+ map { printf( "%-15s%s\n", "", $_ ) } @lines;
+
+ print "\n";
+ }
+ else
+ {
+ print "$key:\n";
+
+ map { print " $_\n" } @{ $hash{ $key } };
+
+ print "\n";
+ }
+ }
+
+ exit;
+}
+
+
+sub script_list_biotools
+{
+ # Martin A. Hansen, January 2008.
+
+ # Prints the description from the usage for each of the biotools.
+
+ my ( $path, # full path to usage directory
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( @files, $file, $fh, $line, @lines, $program );
+
+ @files = &Maasha::Common::ls_files( $path );
+
+ foreach $file ( sort @files )
+ {
+ $program = ( split "/", $file )[ -1 ];
+
+ $fh = &Maasha::Common::read_open( $file );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^Description:\s+(.+)/ )
+ {
+ @lines = &Maasha::Common::wrap_line( $1, 60 );
+
+ printf( "%-30s%s\n", $program, shift @lines );
+
+ map { printf( "%-30s%s\n", "", $_ ) } @lines;
+ }
+ }
+
+ close $fh;
+ }
+
+ exit;
+}
+
+
+sub script_read_fasta
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read sequences from FASTA file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file, $data_in, $entry, $num );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::Fasta::get_entry( $data_in ) )
+ {
+ if ( defined $entry->[ SEQ_NAME ] and $entry->[ SEQ ] )
+ {
+ $record = {
+ SEQ_NAME => $entry->[ SEQ_NAME ],
+ SEQ => $entry->[ SEQ ],
+ SEQ_LEN => length $entry->[ SEQ ],
+ };
+
+ &put_record( $record, $out );
+ }
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_align
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read aligned sequences from FASTA file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $entry, $record, $file, $data_in, $num );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::Fasta::get_entry( $data_in ) )
+ {
+ if ( $entry->[ SEQ_NAME ] and $entry->[ SEQ ] )
+ {
+ $record = {
+ ALIGN => 1,
+ SEQ_NAME => $entry->[ SEQ_NAME ],
+ SEQ => $entry->[ SEQ ],
+ ALIGN_LEN => length $entry->[ SEQ ],
+ };
+
+ &put_record( $record, $out );
+ }
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_tab
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read table or table columns from stream or file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $file, $line, @fields, @fields2, $i, $record, $data_in, $skip, $num );
+
+ $options->{ 'delimit' } ||= '\s+';
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $skip = $options->{ 'skip' } ||= 0;
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $line = <$data_in> )
+ {
+ if ( $skip )
+ {
+ $skip--;
+ next;
+ }
+
+ next if $line =~ /^#|^$/;
+
+ chomp $line;
+
+ undef $record;
+ undef @fields2;
+
+ @fields = split /$options->{'delimit'}/, $line;
+
+ if ( $options->{ "cols" } ) {
+ map { push @fields2, $fields[ $_ ] } @{ $options->{ "cols" } };
+ } else {
+ @fields2 = @fields;
+ }
+
+ for ( $i = 0; $i < @fields2; $i++ )
+ {
+ if ( $options->{ "keys" }->[ $i ] ) {
+ $record->{ $options->{ "keys" }->[ $i ] } = $fields2[ $i ];
+ } else {
+ $record->{ "V" . $i } = $fields2[ $i ];
+ }
+ }
+
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_psl
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read psl table from stream or file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @files, $file, $entries, $entry, $num );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $entries = &Maasha::UCSC::psl_get_entries( $file );
+
+ foreach $entry ( @{ $entries } )
+ {
+ &put_record( $entry, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+ }
+
+ NUM:
+}
+
+
+sub script_read_bed
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read bed table from stream or file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $file, $record, $entry, $data_in, $num );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::UCSC::bed_get_entry( $data_in ) )
+ {
+ &put_record( $entry, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_blast_tab
+{
+ # Martin A. Hansen, September 2007.
+
+ # Read tabular BLAST output from NCBI blast run with -m8 or -m9.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $file, $line, @fields, $strand, $record, $data_in, $num );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $line = <$data_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split /\t/, $line;
+
+ $record->{ "REC_TYPE" } = "BLAST";
+ $record->{ "Q_ID" } = $fields[ 0 ];
+ $record->{ "S_ID" } = $fields[ 1 ];
+ $record->{ "IDENT" } = $fields[ 2 ];
+ $record->{ "ALIGN_LEN" } = $fields[ 3 ];
+ $record->{ "MISMATCHES" } = $fields[ 4 ];
+ $record->{ "GAPS" } = $fields[ 5 ];
+ $record->{ "Q_BEG" } = $fields[ 6 ] - 1; # BLAST is 1-based
+ $record->{ "Q_END" } = $fields[ 7 ] - 1; # BLAST is 1-based
+ $record->{ "S_BEG" } = $fields[ 8 ] - 1; # BLAST is 1-based
+ $record->{ "S_END" } = $fields[ 9 ] - 1; # BLAST is 1-based
+ $record->{ "E_VAL" } = $fields[ 10 ];
+ $record->{ "BIT_SCORE" } = $fields[ 11 ];
+
+ if ( $record->{ "S_BEG" } > $record->{ "S_END" } )
+ {
+ $record->{ "STRAND" } = '-';
+
+ ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } );
+ }
+ else
+ {
+ $record->{ "STRAND" } = '+';
+ }
+
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_embl
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read EMBL format.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( %options2, $file, $data_in, $num, $entry, $record );
+
+ map { $options2{ "keys" }{ $_ } = 1 } @{ $options->{ "keys" } };
+ map { $options2{ "feats" }{ $_ } = 1 } @{ $options->{ "feats" } };
+ map { $options2{ "quals" }{ $_ } = 1 } @{ $options->{ "quals" } };
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::EMBL::get_embl_entry( $data_in ) )
+ {
+ $record = &Maasha::EMBL::parse_embl_entry( $entry, \%options2 );
+
+ my ( $feat, $feat2, $qual, $qual_val, $record_copy );
+
+ $record_copy = dclone $record;
+
+ delete $record_copy->{ "FT" };
+
+ &put_record( $record_copy, $out );
+
+ delete $record_copy->{ "SEQ" };
+
+ foreach $feat ( keys %{ $record->{ "FT" } } )
+ {
+ $record_copy->{ "FEAT_TYPE" } = $feat;
+
+ foreach $feat2 ( @{ $record->{ "FT" }->{ $feat } } )
+ {
+ foreach $qual ( keys %{ $feat2 } )
+ {
+ $qual_val = join "; ", @{ $feat2->{ $qual } };
+
+ $qual =~ s/^_//;
+ $qual = uc $qual;
+
+ $record_copy->{ $qual } = $qual_val;
+ }
+
+ &put_record( $record_copy, $out );
+ }
+ }
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_stockholm
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read Stockholm format.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $data_in, $file, $num, $entry, $record, $record_anno, $record_align, $key, $seq );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::Stockholm::get_stockholm_entry( $data_in ) )
+ {
+ $record = &Maasha::Stockholm::parse_stockholm_entry( $entry );
+
+ undef $record_anno;
+
+ foreach $key ( keys %{ $record->{ "GF" } } ) {
+ $record_anno->{ $key } = $record->{ "GF" }->{ $key };
+ }
+
+ $record_anno->{ "ALIGN" } = $num;
+
+ &put_record( $record_anno, $out );
+
+ foreach $seq ( @{ $record->{ "ALIGN" } } )
+ {
+ undef $record_align;
+
+ $record_align = {
+ ALIGN => $num,
+ SEQ_NAME => $seq->[ 0 ],
+ SEQ => $seq->[ 1 ],
+ };
+
+ &put_record( $record_align, $out );
+ }
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_phastcons
+{
+ # Martin A. Hansen, December 2007.
+
+ # Read PhastCons format.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $data_in, $file, $num, $entry, @records, $record );
+
+ $options->{ "min" } ||= 10;
+ $options->{ "dist" } ||= 25;
+ $options->{ "threshold" } ||= 0.8;
+ $options->{ "gap" } ||= 5;
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::UCSC::phastcons_get_entry( $data_in ) )
+ {
+ @records = &Maasha::UCSC::phastcons_parse_entry( $entry, $options );
+
+ foreach $record ( @records )
+ {
+ $record->{ "REC_TYPE" } = "BED";
+ $record->{ "BED_LEN" } = $record->{ "CHR_END" } - $record->{ "CHR_BEG" } + 1;
+
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_soft
+{
+ # Martin A. Hansen, December 2007.
+
+ # Read soft format.
+ # http://www.ncbi.nlm.nih.gov/geo/info/soft2.html
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $data_in, $file, $num, $records, $record, $soft_index, $fh, @platforms, $plat_table, @samples, $sample, $old_end );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $soft_index = &Maasha::NCBI::soft_index_file( $file );
+
+ $fh = &Maasha::Common::read_open( $file );
+
+ @platforms = grep { $_->[ 0 ] =~ /PLATFORM/ } @{ $soft_index };
+
+ $plat_table = &Maasha::NCBI::soft_get_platform( $fh, $platforms[ 0 ]->[ 1 ], $platforms[ -1 ]->[ 2 ] );
+
+ @samples = grep { $_->[ 0 ] =~ /SAMPLE/ } @{ $soft_index };
+
+ $old_end = $platforms[ -1 ]->[ 2 ];
+
+ foreach $sample ( @samples )
+ {
+ $records = &Maasha::NCBI::soft_get_sample( $fh, $plat_table, $sample->[ 1 ] - $old_end - 1, $sample->[ 2 ] - $old_end - 1 );
+
+ foreach $record ( @{ $records } )
+ {
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ $old_end = $sample->[ 2 ];
+ }
+
+ close $fh;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+ close $fh if $fh;
+}
+
+
+sub script_read_gff
+{
+ # Martin A. Hansen, February 2008.
+
+ # Read soft format.
+ # http://www.ncbi.nlm.nih.gov/geo/info/soft2.html
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $data_in, $file, $fh, $num, $record, $entry );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $fh = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &Maasha::GFF::get_entry( $fh ) )
+ {
+ &put_record( $entry, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $fh;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_2bit
+{
+ # Martin A. Hansen, March 2008.
+
+ # Read sequences from 2bit file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file, $data_in, $mask, $toc, $line, $num );
+
+ $mask = 1 if not $options->{ "no_mask" };
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ $toc = &Maasha::TwoBit::twobit_get_TOC( $data_in );
+
+ foreach $line ( @{ $toc } )
+ {
+ $record->{ "SEQ_NAME" } = $line->[ 0 ];
+ $record->{ "SEQ" } = &Maasha::TwoBit::twobit_get_seq( $data_in, $line->[ 1 ], undef, undef, $mask );
+ $record->{ "SEQ_LEN" } = length $record->{ "SEQ" };
+
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_solexa
+{
+ # Martin A. Hansen, March 2008.
+
+ # Read Solexa sequence reads from file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file, $base_name, $data_in, $line, $num, @fields, @seqs, @scores, $i, $seq, $seq_count );
+
+ $options->{ "quality" } ||= 20;
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+ $base_name = &Maasha::Common::get_basename( $file );
+ $base_name =~ s/\..*//;
+
+ $seq_count = 0;
+
+ while ( $line = <$data_in> )
+ {
+ @fields = split /:/, $line;
+ @seqs = split //, $fields[ 5 ];
+ @scores = split / /, $fields[ -1 ];
+
+ for ( $i = 0; $i < @scores; $i++ ) {
+ $seqs[ $i ] = lc $seqs[ $i ] if $scores[ $i ] < $options->{ "quality" };
+ }
+
+ $seq = join "", @seqs;
+
+ $record->{ "SEQ_NAME" } = sprintf( "%s_ID%08d", $base_name, $seq_count );
+ $record->{ "SEQ" } = $seq;
+ $record->{ "SEQ_LEN" } = length $seq;
+ $record->{ "SCORE_MEAN" } = sprintf ( "%.2f", &Maasha::Calc::mean( \@scores ) );
+
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $seq_count++;
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_solid
+{
+ # Martin A. Hansen, April 2008.
+
+ # Read Solid sequence from file.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file, $data_in, $line, $num, $seq_name, $seq_cs, $seq_qual, @scores, @seqs, $i );
+
+ $options->{ "quality" } ||= 15;
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $data_in = &Maasha::Common::read_open( $file );
+
+ while ( $line = <$data_in> )
+ {
+ chomp $line;
+
+ ( $seq_name, $seq_cs, $seq_qual ) = split /\t/, $line;
+
+ @scores = split /,/, $seq_qual;
+ @seqs = split //, &Maasha::Solid::color_space2seq( $seq_cs );
+
+ for ( $i = 0; $i < @seqs; $i++ ) {
+ $seqs[ $i ] = lc $seqs[ $i ] if $scores[ $i ] < $options->{ "quality" };
+ }
+
+ $record = {
+ SEQ_NAME => $seq_name,
+ SEQ_CS => $seq_cs,
+ SEQ_QUAL => $seq_qual,
+ SEQ_LEN => length $seq_cs,
+ SEQ => join( "", @seqs ),
+ SCORE_MEAN => sprintf( "%.2f", &Maasha::Calc::mean( \@scores ) ),
+ };
+
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+
+ close $data_in;
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
+
+
+sub script_read_mysql
+{
+ # Martin A. Hansen, May 2008.
+
+ # Read a MySQL query into stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $dbh, $results );
+
+ $options->{ "user" } ||= &Maasha::UCSC::ucsc_get_user();
+ $options->{ "password" } ||= &Maasha::UCSC::ucsc_get_password();
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $dbh = &Maasha::SQL::connect( $options->{ "database" }, $options->{ "user" }, $options->{ "password" } );
+
+ $results = &Maasha::SQL::query_hashref_list( $dbh, $options->{ "query" } );
+
+ &Maasha::SQL::disconnect( $dbh );
+
+ map { &put_record( $_ ) } @{ $results };
+}
+
+
+sub script_count_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Count sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $count, $result, $fh );
+
+ $count = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ $count++ if $record->{ "SEQ" };
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $result = { "count_seq" => $count };
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ &put_record( $result, $fh );
+
+ close $fh;
+}
+
+
+sub script_length_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Determine the length of sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $total );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ $record->{ "SEQ_LEN" } = length $record->{ "SEQ" };
+ $total += $record->{ "SEQ_LEN" };
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ &put_record( { TOTAL_SEQ_LEN => $total }, $out );
+}
+
+
+sub script_uppercase_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Uppercases sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ $record->{ "SEQ" } = uc $record->{ "SEQ" } if $record->{ "SEQ" };
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_shuffle_seq
+{
+ # Martin A. Hansen, December 2007.
+
+ # Shuffle sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ $record->{ "SEQ" } = &Maasha::Seq::seq_shuffle( $record->{ "SEQ" } ) if $record->{ "SEQ" };
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_analyze_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Analyze sequence composition of sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $analysis );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ $analysis = &Maasha::Seq::seq_analyze( $record->{ "SEQ" } );
+
+ map { $record->{ $_ } = $analysis->{ $_ } } keys %{ $analysis };
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_analyze_tags
+{
+ # Martin A. Hansen, August 2008.
+
+ # Analyze sequence tags in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $analysis, %len_hash, %clone_hash, $clones, $key, $tag_record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ if ( $record->{ "SEQ_NAME" } =~ /_(\d+)$/ )
+ {
+ $clones = $1;
+
+ $len_hash{ length( $record->{ "SEQ" } ) }++;
+ $clone_hash{ length( $record->{ "SEQ" } ) } += $clones;
+ }
+ }
+ elsif ( $record->{ "Q_ID" } and $record->{ "BED_LEN" } )
+ {
+ if ( $record->{ "Q_ID" } =~ /_(\d+)$/ )
+ {
+ $clones = $1;
+
+ $len_hash{ $record->{ "BED_LEN" } }++;
+ $clone_hash{ $record->{ "BED_LEN" } } += $clones;
+ }
+ }
+ }
+
+ foreach $key ( sort { $a <=> $b } keys %len_hash )
+ {
+ $tag_record->{ "TAG_LEN" } = $key;
+ $tag_record->{ "TAG_COUNT" } = $len_hash{ $key };
+ $tag_record->{ "TAG_CLONES" } = $clone_hash{ $key };
+
+ &put_record( $tag_record, $out );
+ }
+}
+
+
+sub script_complexity_seq
+{
+ # Martin A. Hansen, May 2008.
+
+ # Generates an index calculated as the most common di-residue over
+ # the sequence length for all sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $index );
+
+ while ( $record = &get_record( $in ) )
+ {
+ $record->{ "SEQ_COMPLEXITY" } = sprintf( "%.2f", &Maasha::Seq::seq_complexity( $record->{ "SEQ" } ) ) if $record->{ "SEQ" };
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_oligo_freq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Determine the length of sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, %oligos, @freq_table );
+
+ $options->{ "word_size" } ||= 7;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ map { $oligos{ $_ }++ } &Maasha::Seq::seq2oligos( \$record->{ "SEQ" }, $options->{ "word_size" } );
+
+ if ( not $options->{ "all" } )
+ {
+ @freq_table = &Maasha::Seq::oligo_freq( \%oligos );
+
+ map { &put_record( $_, $out ) } @freq_table;
+
+ undef %oligos;
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+
+ if ( $options->{ "all" } )
+ {
+ @freq_table = &Maasha::Seq::oligo_freq( \%oligos );
+
+ map { &put_record( $_, $out ) } @freq_table;
+ }
+}
+
+
+sub script_create_weight_matrix
+{
+ # Martin A. Hansen, August 2007.
+
+ # Creates a weight matrix from an alignmnet.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $count, $i, $res, %freq_hash, %res_hash, $freq );
+
+ $count = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ for ( $i = 0; $i < length $record->{ "SEQ" }; $i++ )
+ {
+ $res = substr $record->{ "SEQ" }, $i, 1;
+
+ $freq_hash{ $i }{ $res }++;
+ $res_hash{ $res } = 1;
+ }
+
+ $count++;
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+
+ foreach $res ( sort keys %res_hash )
+ {
+ undef $record;
+
+ $record->{ "V0" } = $res;
+
+ for ( $i = 0; $i < keys %freq_hash; $i++ )
+ {
+ $freq = $freq_hash{ $i }{ $res } || 0;
+
+ if ( $options->{ "percent" } ) {
+ $freq = sprintf( "%.0f", 100 * $freq / $count ) if $freq > 0;
+ }
+
+ $record->{ "V" . ( $i + 1 ) } = $freq;
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_calc_bit_scores
+{
+ # Martin A. Hansen, March 2007.
+
+ # Calculates the bit scores for each position from an alignmnet in the stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $type, $count, $i, $res, %freq_hash, $bit_max, $bit_height, $bit_diff );
+
+ $count = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ $type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type;
+
+ for ( $i = 0; $i < length $record->{ "SEQ" }; $i++ )
+ {
+ $res = substr $record->{ "SEQ" }, $i, 1;
+
+ next if $res =~ /-|_|~|\./;
+
+ $freq_hash{ $i }{ $res }++;
+ }
+
+ $count++;
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+
+ undef $record;
+
+ if ( $type eq "protein" ) {
+ $bit_max = 4;
+ } else {
+ $bit_max = 2;
+ }
+
+ for ( $i = 0; $i < keys %freq_hash; $i++ )
+ {
+ $bit_height = &Maasha::Seq::seqlogo_calc_bit_height( $freq_hash{ $i }, $count );
+
+ $bit_diff = $bit_max - $bit_height;
+
+ $record->{ "V" . ( $i ) } = sprintf( "%.2f", $bit_diff );
+ }
+
+ &put_record( $record, $out );
+}
+
+
+sub script_reverse_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Reverse sequence in record.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } ) {
+ $record->{ "SEQ" } = reverse $record->{ "SEQ" };
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_complement_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Complement sequence in record.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $type );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ if ( not $type ) {
+ $type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } );
+ }
+
+ if ( $type eq "rna" ) {
+ &Maasha::Seq::rna_comp( \$record->{ "SEQ" } );
+ } elsif ( $type eq "dna" ) {
+ &Maasha::Seq::dna_comp( \$record->{ "SEQ" } );
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_remove_indels
+{
+ # Martin A. Hansen, August 2007.
+
+ # Remove indels from sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ $record->{ 'SEQ' } =~ tr/-~.//d if $record->{ "SEQ" };
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_transliterate_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Transliterate chars from sequence in record.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $search, $replace, $delete );
+
+ $search = $options->{ "search" } || "";
+ $replace = $options->{ "replace" } || "";
+ $delete = $options->{ "delete" } || "";
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ if ( $search and $replace ) {
+ eval "\$record->{ 'SEQ' } =~ tr/$search/$replace/";
+ } elsif ( $delete ) {
+ eval "\$record->{ 'SEQ' } =~ tr/$delete//d";
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_transliterate_vals
+{
+ # Martin A. Hansen, April 2008.
+
+ # Transliterate chars from values in record.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $search, $replace, $delete, $key );
+
+ $search = $options->{ "search" } || "";
+ $replace = $options->{ "replace" } || "";
+ $delete = $options->{ "delete" } || "";
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( exists $record->{ $key } )
+ {
+ if ( $search and $replace ) {
+ eval "\$record->{ $key } =~ tr/$search/$replace/";
+ } elsif ( $delete ) {
+ eval "\$record->{ $key } =~ tr/$delete//d";
+ }
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_translate_seq
+{
+ # Martin A. Hansen, February 2008.
+
+ # Translate DNA sequence into protein sequence.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $frame, %new_record );
+
+ $options->{ "frames" } ||= [ 1, 2, 3, -1, -2, -3 ];
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ if ( &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) eq "dna" )
+ {
+ foreach $frame ( @{ $options->{ "frames" } } )
+ {
+ %new_record = %{ $record };
+
+ $new_record{ "SEQ" } = &Maasha::Seq::translate( $record->{ "SEQ" }, $frame );
+ $new_record{ "SEQ_LEN" } = length $record->{ "SEQ" };
+ $new_record{ "FRAME" } = $frame;
+
+ &put_record( \%new_record, $out );
+ }
+ }
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+}
+
+
+sub script_extract_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Extract subsequences from sequences in record.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $beg, $end, $len, $record );
+
+ if ( not defined $options->{ "beg" } or $options->{ "beg" } < 0 ) {
+ $beg = 0;
+ } else {
+ $beg = $options->{ "beg" } - 1; # correcting for start offset
+ }
+
+ if ( defined $options->{ "end" } and $options->{ "end" } - 1 < $beg ) {
+ $end = $beg - 1;
+ } elsif ( defined $options->{ "end" } ) {
+ $end = $options->{ "end" } - 1; # correcting for start offset
+ }
+
+ $len = $options->{ "len" };
+
+# print "beg->$beg, end->$end, len->$len\n";
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ if ( defined $beg and defined $end )
+ {
+ if ( $end - $beg + 1 > length $record->{ "SEQ" } ) {
+ $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg;
+ } else {
+ $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg, $end - $beg + 1;
+ }
+ }
+ elsif ( defined $beg and defined $len )
+ {
+ if ( $len > length $record->{ "SEQ" } ) {
+ $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg;
+ } else {
+ $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg, $len;
+ }
+ }
+ elsif ( defined $beg )
+ {
+ $record->{ "SEQ" } = substr $record->{ "SEQ" }, $beg;
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_get_genome_seq
+{
+ # Martin A. Hansen, December 2007.
+
+ # Gets a subsequence from a genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $genome_file, $index_file, $index, $fh, $index_head, $index_beg, $index_len, $beg, $len, %lookup_hash, @begs, @lens, $i );
+
+ $options->{ "flank" } ||= 0;
+
+ if ( $options->{ "genome" } )
+ {
+ $genome_file = &Maasha::Config::genome_fasta( $options->{ 'genome' } );
+ $index_file = &Maasha::Config::genome_fasta_index( $options->{ 'genome' } );
+
+ $fh = &Maasha::Common::read_open( $genome_file );
+ $index = &Maasha::Fasta::index_retrieve( $index_file );
+
+ shift @{ $index }; # Get rid of the file size info
+
+ map { $lookup_hash{ $_->[ 0 ] } = [ $_->[ 1 ], $_->[ 2 ] ] } @{ $index };
+
+ if ( exists $lookup_hash{ $options->{ "chr" } } and defined $options->{ "beg" } and ( defined $options->{ "end" } or defined $options->{ "len" } ) )
+ {
+ ( $index_beg, $index_len ) = @{ $lookup_hash{ $options->{ "chr" } } };
+
+ $beg = $index_beg + $options->{ "beg" } - 1;
+
+ if ( $options->{ "len" } ) {
+ $len = $options->{ "len" };
+ } elsif ( $options->{ "end" } ) {
+ $len = ( $options->{ "end" } - $options->{ "beg" } + 1 );
+ }
+
+ $beg -= $options->{ "flank" };
+ $len += 2 * $options->{ "flank" };
+
+ if ( $beg <= $index_beg )
+ {
+ $len -= $index_beg - $beg;
+ $beg = $index_beg;
+ }
+
+ $len = $index_beg + $index_len - $beg if $beg + $len > $index_beg + $index_len;
+
+ next if $beg > $index_beg + $index_len;
+
+ $record->{ "CHR" } = $options->{ "chr" };
+ $record->{ "CHR_BEG" } = $beg - $index_beg;
+ $record->{ "CHR_END" } = $record->{ "CHR_BEG" } + $len - 1;
+
+ $record->{ "SEQ" } = &Maasha::Common::file_read( $fh, $beg, $len );
+ $record->{ "SEQ_LEN" } = $len;
+
+ &put_record( $record, $out );
+ }
+ }
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $options->{ "genome" } and not $record->{ "SEQ" } )
+ {
+ if ( $record->{ "REC_TYPE" } eq "BED" and exists $lookup_hash{ $record->{ "CHR" } } )
+ {
+ ( $index_beg, $index_len ) = @{ $lookup_hash{ $record->{ "CHR" } } };
+
+ $beg = $record->{ "CHR_BEG" } + $index_beg;
+ $len = $record->{ "CHR_END" } - $record->{ "CHR_BEG" } + 1;
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "PSL" and exists $lookup_hash{ $record->{ "S_ID" } } )
+ {
+ ( $index_beg, $index_len ) = @{ $lookup_hash{ $record->{ "S_ID" } } };
+
+ $beg = $record->{ "S_BEG" } + $index_beg;
+ $len = $record->{ "S_END" } - $record->{ "S_BEG" } + 1;
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BLAST" and exists $lookup_hash{ $record->{ "S_ID" } } )
+ {
+ ( $index_beg, $index_len ) = @{ $lookup_hash{ $record->{ "S_ID" } } };
+
+ $beg = $record->{ "S_BEG" } + $index_beg;
+ $len = $record->{ "S_END" } - $record->{ "S_BEG" } + 1;
+ }
+
+ $beg -= $options->{ "flank" };
+ $len += 2 * $options->{ "flank" };
+
+ if ( $beg <= $index_beg )
+ {
+ $len -= $index_beg - $beg;
+ $beg = $index_beg;
+ }
+
+ $len = $index_beg + $index_len - $beg if $beg + $len > $index_beg + $index_len;
+
+ next if $beg > $index_beg + $index_len;
+
+ $record->{ "CHR_BEG" } = $beg - $index_beg;
+ $record->{ "CHR_END" } = $record->{ "CHR_BEG" } + $len - 1;
+
+ $record->{ "SEQ" } = &Maasha::Common::file_read( $fh, $beg, $len );
+
+ if ( $record->{ "STRAND" } and $record->{ "STRAND" } eq "-" )
+ {
+ &Maasha::Seq::dna_comp( \$record->{ "SEQ" } );
+ $record->{ "SEQ" } = reverse $record->{ "SEQ" };
+ }
+
+ if ( $options->{ "mask" } )
+ {
+ if ( $record->{ "BLOCKCOUNT" } > 1 ) # uppercase hit block segments and lowercase the rest.
+ {
+ $record->{ "SEQ" } = lc $record->{ "SEQ" };
+
+ @begs = split ",", $record->{ "Q_BEGS" };
+ @lens = split ",", $record->{ "BLOCKSIZES" };
+
+ for ( $i = 0; $i < @begs; $i++ ) {
+ substr $record->{ "SEQ" }, $begs[ $i ], $lens[ $i ], uc substr $record->{ "SEQ" }, $begs[ $i ], $lens[ $i ];
+ }
+ }
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+
+ close $fh if $fh;
+}
+
+
+sub script_get_genome_align
+{
+ # Martin A. Hansen, April 2008.
+
+ # Gets a subalignment from a multiple genome alignment.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $maf_track, $align, $align_num, $beg, $end, $len, $entry );
+
+ $options->{ "strand" } ||= "+";
+
+ $align_num = 1;
+
+ $maf_track = &Maasha::Config::maf_track( $options->{ "genome" } );
+
+ if ( $options->{ "chr" } and $options->{ "beg" } and ( $options->{ "end" } or $options->{ "len" } ) )
+ {
+ $beg = $options->{ "beg" } - 1;
+
+ if ( $options->{ "end" } ) {
+ $end = $options->{ "end" };
+ } elsif ( $options->{ "len" } ) {
+ $end = $beg + $options->{ "len" };
+ }
+
+ $align = &Maasha::UCSC::maf_extract( $TMP_DIR, $options->{ "genome" }, $maf_track, $options->{ "chr" }, $beg, $end, $options->{ "strand" } );
+
+ foreach $entry ( @{ $align } )
+ {
+ $entry->{ "ALIGN" } = $align_num;
+ $entry->{ "CHR" } = $record->{ "CHR" };
+ $entry->{ "CHR_BEG" } = $record->{ "CHR_BEG" };
+ $entry->{ "CHR_END" } = $record->{ "CHR_END" };
+ $entry->{ "STRAND" } = $record->{ "STRAND" } || '+';
+ $entry->{ "Q_ID" } = $record->{ "Q_ID" };
+ $entry->{ "SCORE" } = $record->{ "SCORE" };
+
+ &put_record( $entry, $out );
+ }
+ }
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "REC_TYPE" } eq "BED" )
+ {
+ $align = &Maasha::UCSC::maf_extract( $TMP_DIR, $options->{ "genome" }, $maf_track, $record->{ "CHR" }, $record->{ "CHR_BEG" }, $record->{ "CHR_END" }, $record->{ "STRAND" } );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "PSL" )
+ {
+ $align = &Maasha::UCSC::maf_extract( $TMP_DIR, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $record->{ "STRAND" } );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BLAST" )
+ {
+ $align = &Maasha::UCSC::maf_extract( $TMP_DIR, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $record->{ "STRAND" } );
+ }
+
+ foreach $entry ( @{ $align } )
+ {
+ $entry->{ "ALIGN" } = $align_num;
+ $entry->{ "CHR" } = $record->{ "CHR" };
+ $entry->{ "CHR_BEG" } = $record->{ "CHR_BEG" };
+ $entry->{ "CHR_END" } = $record->{ "CHR_END" };
+ $entry->{ "STRAND" } = $record->{ "STRAND" };
+ $entry->{ "Q_ID" } = $record->{ "Q_ID" };
+ $entry->{ "SCORE" } = $record->{ "SCORE" };
+
+ &put_record( $entry, $out );
+ }
+
+ $align_num++;
+ }
+}
+
+
+sub script_get_genome_phastcons
+{
+ # Martin A. Hansen, February 2008.
+
+ # Get phastcons scores from genome intervals.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $phastcons_file, $phastcons_index, $index, $fh_phastcons, $scores, $record );
+
+ $options->{ "flank" } ||= 0;
+
+ $phastcons_file = &Maasha::Config::genome_phastcons( $options->{ "genome" } );
+ $phastcons_index = &Maasha::Config::genome_phastcons_index( $options->{ "genome" } );
+
+ $index = &Maasha::UCSC::phastcons_index_retrieve( $phastcons_index );
+ $fh_phastcons = &Maasha::Common::read_open( $phastcons_file );
+
+ if ( defined $options->{ "chr" } and defined $options->{ "beg" } and ( defined $options->{ "end" } or defined $options->{ "len" } ) )
+ {
+ $options->{ "beg" } -= 1; # request is 1-based
+ $options->{ "end" } -= 1; # request is 1-based
+
+ if ( $options->{ "len" } ) {
+ $options->{ "end" } = $options->{ "beg" } + $options->{ "len" } - 1;
+ }
+
+ $scores = &Maasha::UCSC::phastcons_index_lookup( $index, $fh_phastcons, $options->{ "chr" }, $options->{ "beg" }, $options->{ "end" }, $options->{ "flank" } );
+
+ $record->{ "CHR" } = $options->{ "chr" };
+ $record->{ "CHR_BEG" } = $options->{ "beg" } - $options->{ "flank" };
+ $record->{ "CHR_END" } = $options->{ "end" } + $options->{ "flank" };
+
+ $record->{ "PHASTCONS" } = join ",", @{ $scores };
+ $record->{ "PHAST_COUNT" } = scalar @{ $scores }; # DEBUG
+
+ &put_record( $record, $out );
+ }
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "REC_TYPE" } eq "BED" )
+ {
+ $scores = &Maasha::UCSC::phastcons_index_lookup( $index, $fh_phastcons, $record->{ "CHR" }, $record->{ "CHR_BEG" }, $record->{ "CHR_END" }, $options->{ "flank" } );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "PSL" )
+ {
+ $scores = &Maasha::UCSC::phastcons_index_lookup( $index, $fh_phastcons, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $options->{ "flank" } );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BLAST" )
+ {
+ $scores = &Maasha::UCSC::phastcons_index_lookup( $index, $fh_phastcons, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $options->{ "flank" } );
+ }
+
+ $record->{ "PHASTCONS" } = join ",", @{ $scores } if @{ $scores };
+# $record->{ "PHAST_COUNT" } = @{ $scores } if @{ $scores }; # DEBUG
+
+ &put_record( $record, $out );
+ }
+
+ close $fh_phastcons if $fh_phastcons;
+}
+
+
+sub script_fold_seq
+{
+ # Martin A. Hansen, December 2007.
+
+ # Folds sequences in stream into secondary structures.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $type, $struct, $index );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } )
+ {
+ if ( not $type ) {
+ $type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } );
+ }
+
+ if ( $type ne "protein" )
+ {
+ ( $struct, $index ) = &Maasha::Seq::fold_struct_rnafold( $record->{ "SEQ" } );
+ $record->{ "SEC_STRUCT" } = $struct;
+ $record->{ "FREE_ENERGY" } = $index;
+ $record->{ "SCORE" } = abs int $index;
+ $record->{ "SIZE" } = length $struct;
+ $record->{ "CONF" } = "1," x $record->{ "SIZE" };
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_split_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Split a sequence in stream into words.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $new_record, $i, $subseq, %lookup );
+
+ $options->{ "word_size" } ||= 7;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ for ( $i = 0; $i < length( $record->{ "SEQ" } ) - $options->{ "word_size" } + 1; $i++ )
+ {
+ $subseq = substr $record->{ "SEQ" }, $i, $options->{ "word_size" };
+
+ if ( $options->{ "uniq" } and not $lookup{ $subseq } )
+ {
+ $new_record->{ "REC_TYPE" } = "SPLIT";
+ $new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]";
+ $new_record->{ "SEQ" } = $subseq;
+
+ &put_record( $new_record, $out );
+
+ $lookup{ $subseq } = 1;
+ }
+ else
+ {
+ $new_record->{ "REC_TYPE" } = "SPLIT";
+ $new_record->{ "SEQ_NAME" } = $record->{ "SEQ_NAME" } . "[" . ( $i + 1 ) . "-" . ( $i + $options->{ "word_size" } ) . "]";
+ $new_record->{ "SEQ" } = $subseq;
+
+ &put_record( $new_record, $out );
+ }
+ }
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+}
+
+
+sub script_split_bed
+{
+ # Martin A. Hansen, June 2008.
+
+ # Split a BED record into overlapping windows.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $new_record, $i );
+
+ $options->{ "window_size" } ||= 20;
+ $options->{ "step_size" } ||= 1;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "CHR_END" } )
+ {
+ $record->{ "BED_LEN" } = $record->{ "CHR_END" } - $record->{ "CHR_BEG" } + 1;
+
+ for ( $i = 0; $i < $record->{ "BED_LEN" } - $options->{ "window_size" }; $i += $options->{ "step_size" } )
+ {
+ $new_record->{ "REC_TYPE" } = "BED";
+ $new_record->{ "CHR" } = $record->{ "CHR" };
+ $new_record->{ "CHR_BEG" } = $record->{ "CHR_BEG" } + $i;
+ $new_record->{ "CHR_END" } = $record->{ "CHR_BEG" } + $i + $options->{ "window_size" };
+ $new_record->{ "BED_LEN" } = $options->{ "window_size" };
+ $new_record->{ "Q_ID" } = $record->{ "Q_ID" } . "_$i";
+ $new_record->{ "SCORE" } = $record->{ "SCORE" };
+ $new_record->{ "STRAND" } = $record->{ "STRAND" };
+
+ &put_record( $new_record, $out );
+ }
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+}
+
+
+sub script_align_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Align sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @entries, $entry );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+ } elsif ( $record->{ "Q_ID" } and $record->{ "SEQ" } ) {
+ push @entries, [ $record->{ "Q_ID" }, $record->{ "SEQ" } ];
+ } else {
+ &put_record( $record, $out );
+ }
+ }
+
+ @entries = &Maasha::Align::align( \@entries );
+
+ foreach $entry ( @entries )
+ {
+ if ( $entry->[ SEQ_NAME ] and $entry->[ SEQ ] )
+ {
+ $record = {
+ ALIGN => 1,
+ SEQ_NAME => $entry->[ SEQ_NAME ],
+ SEQ => $entry->[ SEQ ],
+ };
+
+ &put_record( $record, $out );
+ }
+ }
+}
+
+
+sub script_tile_seq
+{
+ # Martin A. Hansen, February 2008.
+
+ # Using the first sequence in stream as reference, tile
+ # all subsequent sequences based on pairwise alignments.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $first, $ref_entry, @entries );
+
+ $first = 1;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ if ( $first )
+ {
+ $ref_entry = [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+
+ $first = 0;
+ }
+ else
+ {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+ }
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+
+ @entries = &Maasha::Align::align_tile( $ref_entry, \@entries, $options );
+
+ map { &put_record( { SEQ_NAME => $_->[ SEQ_NAME ], SEQ => $_->[ SEQ ], ALIGN => 1 }, $out ) } @entries;
+}
+
+
+sub script_invert_align
+{
+ # Martin A. Hansen, February 2008.
+
+ # Inverts an alignment showing only non-mathing residues
+ # using the first sequence as reference.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @entries );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } and $record->{ "ALIGN" } )
+ {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+
+ &Maasha::Align::align_invert( \@entries, $options->{ "soft" } );
+
+ map { &put_record( { SEQ_NAME => $_->[ SEQ_NAME ], SEQ => $_->[ SEQ ], ALIGN => 1 }, $out ) } @entries;
+}
+
+
+sub script_patscan_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Locates patterns in sequences using scan_for_matches.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $genome_file, @args, $arg, $type, $seq_file, $pat_file, $out_file, $fh_in, $fh_out, $record, $patterns, $pattern, $entry, $result, %head_hash, $i );
+
+ if ( $options->{ "patterns" } ) {
+ $patterns = &Maasha::Patscan::parse_patterns( $options->{ "patterns" } );
+ } elsif ( -f $options->{ "patterns_in" } ) {
+ $patterns = &Maasha::Patscan::read_patterns( $options->{ "patterns_in" } );
+ }
+
+ $genome_file = &Maasha::Config::genome_fasta( $options->{ 'genome' } ) if $options->{ 'genome' };
+
+ push @args, "-c" if $options->{ "comp" };
+ push @args, "-m $options->{ 'max_hits' }" if $options->{ 'max_hits' };
+ push @args, "-n $options->{ 'max_misses' }" if $options->{ 'max_hits' };
+
+ $seq_file = "$TMP_DIR/patscan.seq";
+ $pat_file = "$TMP_DIR/patscan.pat";
+ $out_file = "$TMP_DIR/patscan.out";
+
+ $fh_out = &Maasha::Common::write_open( $seq_file );
+
+ $i = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ" } and $record->{ "SEQ_NAME" } )
+ {
+ $type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type;
+
+ &Maasha::Fasta::put_entry( [ $i, $record->{ "SEQ" } ], $fh_out );
+
+ $head_hash{ $i } = $record->{ "SEQ_NAME" };
+
+ $i++;
+ }
+
+# &put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ $arg = join " ", @args;
+ $arg .= " -p" if $type eq "protein";
+
+ foreach $pattern ( @{ $patterns } )
+ {
+ $fh_out = &Maasha::Common::write_open( $pat_file );
+
+ print $fh_out "$pattern\n";
+
+ close $fh_out;
+
+ if ( $options->{ 'genome' } ) {
+ `scan_for_matches $arg $pat_file < $genome_file > $out_file`;
+ # &Maasha::Common::run( "scan_for_matches", "$arg $pat_file < $genome_file > $out_file" );
+ } else {
+ `scan_for_matches $arg $pat_file < $seq_file > $out_file`;
+ # &Maasha::Common::run( "scan_for_matches", "$arg $pat_file < $seq_file > $out_file" );
+ }
+
+ $fh_in = &Maasha::Common::read_open( $out_file );
+
+ while ( $entry = &Maasha::Fasta::get_entry( $fh_in ) )
+ {
+ $result = &Maasha::Patscan::parse_scan_result( $entry, $pattern );
+
+ if ( $options->{ 'genome' } )
+ {
+ $result->{ "CHR" } = $result->{ "S_ID" };
+ $result->{ "CHR_BEG" } = $result->{ "S_BEG" };
+ $result->{ "CHR_END" } = $result->{ "S_END" };
+
+ delete $result->{ "S_ID" };
+ delete $result->{ "S_BEG" };
+ delete $result->{ "S_END" };
+ }
+ else
+ {
+ $result->{ "S_ID" } = $head_hash{ $result->{ "S_ID" } };
+ }
+
+ &put_record( $result, $out );
+ }
+
+ close $fh_in;
+ }
+
+ unlink $pat_file;
+ unlink $seq_file;
+ unlink $out_file;
+}
+
+
+sub script_create_blast_db
+{
+ # Martin A. Hansen, September 2007.
+
+ # Creates a NCBI BLAST database with formatdb
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $seq_type, $path, $record );
+
+ $path = $options->{ "database" };
+
+ $fh = &Maasha::Common::write_open( $path );
+
+ while ( $record = &get_record( $in ) )
+ {
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+
+ if ( $record->{ "SEQ" } and $record->{ "SEQ_NAME" } )
+ {
+ $seq_type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $seq_type;
+
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh );
+ }
+ }
+
+ close $fh;
+
+ if ( $seq_type eq "protein" ) {
+ &Maasha::Common::run( "formatdb", "-p T -i $path -t $options->{ 'database' }" );
+ } else {
+ &Maasha::Common::run( "formatdb", "-p F -i $path -t $options->{ 'database' }" );
+ }
+
+ unlink $path;
+}
+
+
+sub script_blast_seq
+{
+ # Martin A. Hansen, September 2007.
+
+ # BLASTs sequences in stream against a given database.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $genome, $q_type, $s_type, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $line, @fields );
+
+ $options->{ "e_val" } = 10 if not defined $options->{ "e_val" };
+ $options->{ "filter" } = "F";
+ $options->{ "filter" } = "T" if $options->{ "filter" };
+ $options->{ "cpus" } ||= 1;
+
+ $options->{ "database" } = &Maasha::Config::genome_blast( $options->{ 'genome' } ) if $options->{ 'genome' };
+
+ $tmp_in = "$TMP_DIR/blast_query.seq";
+ $tmp_out = "$TMP_DIR/blast.result";
+
+ $fh_out = &Maasha::Common::write_open( $tmp_in );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ $q_type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $q_type;
+
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out );
+ }
+
+ &put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ if ( -f $options->{ 'database' } . ".phr" ) {
+ $s_type = "protein";
+ } else {
+ $s_type = "nucleotide";
+ }
+
+ if ( not $options->{ 'program' } )
+ {
+ if ( $q_type ne "protein" and $s_type ne "protein" ) {
+ $options->{ 'program' } = "blastn";
+ } elsif ( $q_type eq "protein" and $s_type eq "protein" ) {
+ $options->{ 'program' } = "blastp";
+ } elsif ( $q_type ne "protein" and $s_type eq "protein" ) {
+ $options->{ 'program' } = "blastx";
+ } elsif ( $q_type eq "protein" and $s_type ne "protein" ) {
+ $options->{ 'program' } = "tblastn";
+ }
+ }
+
+ &Maasha::Common::run( "blastall", "-p $options->{ 'program' } -e $options->{ 'e_val' } -a $options->{ 'cpus' } -m 8 -i $tmp_in -d $options->{ 'database' } -F $options->{ 'filter' } -o $tmp_out > /dev/null 2>&1", 1 );
+
+ unlink $tmp_in;
+
+ $fh_out = &Maasha::Common::read_open( $tmp_out );
+
+ undef $record;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split /\s+/, $line;
+
+ $record->{ "REC_TYPE" } = "BLAST";
+ $record->{ "Q_ID" } = $fields[ 0 ];
+ $record->{ "S_ID" } = $fields[ 1 ];
+ $record->{ "IDENT" } = $fields[ 2 ];
+ $record->{ "ALIGN_LEN" } = $fields[ 3 ];
+ $record->{ "MISMATCHES" } = $fields[ 4 ];
+ $record->{ "GAPS" } = $fields[ 5 ];
+ $record->{ "Q_BEG" } = $fields[ 6 ] - 1; # BLAST is 1-based
+ $record->{ "Q_END" } = $fields[ 7 ] - 1; # BLAST is 1-based
+ $record->{ "S_BEG" } = $fields[ 8 ] - 1; # BLAST is 1-based
+ $record->{ "S_END" } = $fields[ 9 ] - 1; # BLAST is 1-based
+ $record->{ "E_VAL" } = $fields[ 10 ];
+ $record->{ "BIT_SCORE" } = $fields[ 11 ];
+
+ if ( $record->{ "S_BEG" } > $record->{ "S_END" } )
+ {
+ $record->{ "STRAND" } = '-';
+
+ ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } );
+ }
+ else
+ {
+ $record->{ "STRAND" } = '+';
+ }
+
+ &put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ unlink $tmp_out;
+}
+
+
+sub script_blat_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # BLATs sequences in stream against a given genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $blat_args, $genome_file, $query_file, $fh_in, $fh_out, $type, $record, $result_file, $entries );
+
+ $genome_file = &Maasha::Config::genome_fasta( $options->{ "genome" } );
+
+ $options->{ 'tile_size' } ||= 11;
+ $options->{ 'one_off' } ||= 0;
+ $options->{ 'min_identity' } ||= 90;
+ $options->{ 'min_score' } ||= 0;
+ $options->{ 'step_size' } ||= $options->{ 'tile_size' };
+
+ $blat_args .= " -tileSize=$options->{ 'tile_size' }";
+ $blat_args .= " -oneOff=$options->{ 'one_off' }";
+ $blat_args .= " -minIdentity=$options->{ 'min_identity' }";
+ $blat_args .= " -minScore=$options->{ 'min_score' }";
+ $blat_args .= " -stepSize=$options->{ 'step_size' }";
+ $blat_args .= " -ooc=" . &Maasha::Config::genome_blat_ooc( $options->{ "genome" }, 11 ) if $options->{ 'ooc' };
+
+ $query_file = "$TMP_DIR/blat.seq";
+
+ $fh_out = &Maasha::Common::write_open( $query_file );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, 80 );
+ $type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type;
+ }
+
+ &put_record( $record, $out );
+ }
+
+ close $fh_out;
+
+ $blat_args .= " -t=dnax" if $type eq "protein";
+ $blat_args .= " -q=$type";
+
+ $result_file = "$TMP_DIR/blat.psl";
+
+ &Maasha::Common::run( "blat", "$genome_file $query_file $blat_args $result_file > /dev/null 2>&1" );
+
+ unlink $query_file;
+
+ $entries = &Maasha::UCSC::psl_get_entries( $result_file );
+
+ map { &put_record( $_, $out ) } @{ $entries };
+
+ unlink $result_file;
+}
+
+
+sub script_match_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # BLATs sequences in stream against a given genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @entries, $results );
+
+ $options->{ "word_size" } ||= 20;
+ $options->{ "direction" } ||= "both";
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+ }
+
+ &put_record( $record, $out );
+ }
+
+ if ( @entries == 1 )
+ {
+ $results = &Maasha::Match::match_mummer( [ $entries[ 0 ] ], [ $entries[ 0 ] ], $options, $TMP_DIR );
+
+ map { &put_record( $_, $out ) } @{ $results };
+ }
+ elsif ( @entries == 2 )
+ {
+ $results = &Maasha::Match::match_mummer( [ $entries[ 0 ] ], [ $entries[ 1 ] ], $options, $TMP_DIR );
+
+ map { &put_record( $_, $out ) } @{ $results };
+ }
+}
+
+
+sub script_create_vmatch_index
+{
+ # Martin A. Hansen, January 2008.
+
+ # Create a vmatch index from sequences in the stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file_tmp, $fh_tmp, $type );
+
+ if ( $options->{ "index_name" } )
+ {
+ $file_tmp = $options->{ 'index_name' };
+ $fh_tmp = &Maasha::Common::write_open( $file_tmp );
+ }
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $options->{ "index_name" } and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_tmp );
+
+ $type = &Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ) if not $type;
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ if ( $options->{ "index_name" } )
+ {
+ close $fh_tmp;
+
+ if ( $type eq "protein" ) {
+ &Maasha::Common::run( "mkvtree", "-db $file_tmp -protein -pl $options->{ 'prefix_length' } -allout -indexname $file_tmp > /dev/null 2>&1" );
+ } else {
+ &Maasha::Common::run( "mkvtree", "-db $file_tmp -dna -pl $options->{ 'prefix_length' } -allout -indexname $file_tmp > /dev/null 2>&1" );
+ }
+
+ unlink $file_tmp;
+ }
+}
+
+
+sub script_vmatch_seq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Vmatches sequences in stream against a given genome.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( @index_files, @records, $result_file, $fh_in, $record );
+
+ $options->{ 'count' } = 1 if $options->{ 'max_hits' };
+
+ if ( $options->{ "index_name" } ) {
+ @index_files = $options->{ "index_name" };
+ } else {
+ @index_files = &Maasha::Config::genome_vmatch( $options->{ "genome" } );
+ }
+
+ while ( $record = &get_record( $in ) )
+ {
+ push @records, $record;
+
+ &put_record( $record, $out );
+ }
+
+ $result_file = &Maasha::Match::match_vmatch( $TMP_DIR, \@records, \@index_files, $options );
+
+ undef @records;
+
+ $fh_in = &Maasha::Common::read_open( $result_file );
+
+ while ( $record = &Maasha::Match::vmatch_get_entry( $fh_in ) ) {
+ &put_record( $record, $out );
+ }
+
+ close $fh_in;
+
+ unlink $result_file;
+}
+
+
+sub script_write_fasta
+{
+ # Martin A. Hansen, August 2007.
+
+ # Write FASTA entries from sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $fh );
+
+ $fh = &write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh, $options->{ "wrap" } );
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh;
+}
+
+
+sub script_write_align
+{
+ # Martin A. Hansen, August 2007.
+
+ # Write pretty alignments aligned sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $record, $align, $old_align, @entries );
+
+ $fh = &write_stream( $options->{ "data_out" } ) ;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "ALIGN" } and $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ $align = $record->{ "ALIGN" };
+
+ if ( not $old_align )
+ {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+
+ $old_align = $align;
+ }
+ elsif ( $align == $old_align )
+ {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+ }
+ else
+ {
+ if ( scalar( @entries ) == 2 ) {
+ &Maasha::Align::align_print_pairwise( $entries[ 0 ], $entries[ 1 ], $fh, $options->{ "wrap" } );
+ } elsif ( scalar ( @entries ) > 2 ) {
+ &Maasha::Align::align_print_multi( \@entries, $fh, $options->{ "wrap" }, $options->{ "no_ruler" }, $options->{ "no_consensus" } );
+ }
+
+ undef @entries;
+ $old_align = $align;
+ }
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ if ( scalar( @entries ) == 2 ) {
+ &Maasha::Align::align_print_pairwise( $entries[ 0 ], $entries[ 1 ], $fh, $options->{ "wrap" } );
+ } elsif ( scalar ( @entries ) > 2 ) {
+ &Maasha::Align::align_print_multi( \@entries, $fh, $options->{ "wrap" }, $options->{ "no_ruler" }, $options->{ "no_consensus" } );
+ }
+
+ close $fh if $fh;
+}
+
+
+sub script_write_blast
+{
+ # Martin A. Hansen, November 2007.
+
+ # Write data in blast table format (-m8 and 9).
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $record, $first );
+
+ $fh = &write_stream( $options->{ "data_out" }, $options->{ "compress" } ) ;
+
+ $first = 1;
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "REC_TYPE" } eq "BLAST" )
+ {
+ if ( $options->{ "comment" } and $first )
+ {
+ print "# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score\n";
+
+ $first = 0;
+ }
+
+ if ( $record->{ "STRAND" } eq "-" ) {
+ ( $record->{ "S_BEG" }, $record->{ "S_END" } ) = ( $record->{ "S_END" }, $record->{ "S_BEG" } );
+ }
+
+ print $fh join( "\t",
+ $record->{ "Q_ID" },
+ $record->{ "S_ID" },
+ $record->{ "IDENT" },
+ $record->{ "ALIGN_LEN" },
+ $record->{ "MISMATCHES" },
+ $record->{ "GAPS" },
+ $record->{ "Q_BEG" } + 1,
+ $record->{ "Q_END" } + 1,
+ $record->{ "S_BEG" } + 1,
+ $record->{ "S_END" } + 1,
+ $record->{ "E_VAL" },
+ $record->{ "BIT_SCORE" }
+ ), "\n";
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh;
+}
+
+
+sub script_write_tab
+{
+ # Martin A. Hansen, August 2007.
+
+ # Write data as table.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $record, $key, @keys, @vals, $ok, %no_keys, $A, $B );
+
+ $options->{ "delimit" } ||= "\t";
+
+ map { $no_keys{ $_ } = 1 } @{ $options->{ "no_keys" } };
+
+ $fh = &write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+ while ( $record = &get_record( $in ) )
+ {
+ undef @vals;
+ $ok = 1;
+
+ if ( $options->{ "keys" } )
+ {
+ map { $ok = 0 if not exists $record->{ $_ } } @{ $options->{ "keys" } };
+
+ if ( $ok )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( exists $record->{ $key } )
+ {
+ push @keys, $key if $options->{ "comment" };
+ push @vals, $record->{ $key };
+ }
+ }
+ }
+ }
+ else
+ {
+ foreach $key ( sort { $A = $a; $B = $b; $A =~ s/^V(\d+)$/$1/; $B =~ s/^V(\d+)$/$1/; $A <=> $B } keys %{ $record } )
+ {
+ next if exists $no_keys{ $key };
+
+ push @keys, $key if $options->{ "comment" };
+ push @vals, $record->{ $key };
+ }
+ }
+
+ if ( @keys and $options->{ "comment" } )
+ {
+ print $fh "#", join( $options->{ "delimit" }, @keys ), "\n";
+
+ delete $options->{ "comment" };
+ }
+
+ print $fh join( $options->{ "delimit" }, @vals ), "\n" if @vals;
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh;
+}
+
+
+sub script_write_bed
+{
+ # Martin A. Hansen, August 2007.
+
+ # Write BED format for the UCSC genome browser using records in stream.
+
+ # Crude - needs lots of work!
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $record, $new_record );
+
+ $fh = &write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "REC_TYPE" } eq "BED" ) # ---- Hits from BED ----
+ {
+ &Maasha::UCSC::bed_put_entry( $record, $fh, $record->{ "BED_COLS" } );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "PSL" and $record->{ "S_ID" } =~ /^chr/i ) # ---- Hits from BLAT (PSL) ----
+ {
+ $new_record->{ "CHR" } = $record->{ "S_ID" };
+ $new_record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $new_record->{ "CHR_END" } = $record->{ "S_END" };
+ $new_record->{ "Q_ID" } = $record->{ "Q_ID" };
+ $new_record->{ "SCORE" } = $record->{ "SCORE" } || 999;
+ $new_record->{ "STRAND" } = $record->{ "STRAND" };
+
+ &Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "CHR" } ) # ---- Hits from patscan_seq ----
+ {
+ &Maasha::UCSC::bed_put_entry( $record, $fh, 6 );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/i ) # ---- Hits from BLAST ----
+ {
+ $new_record->{ "CHR" } = $record->{ "S_ID" };
+ $new_record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $new_record->{ "CHR_END" } = $record->{ "S_END" };
+ $new_record->{ "Q_ID" } = $record->{ "Q_ID" };
+ $new_record->{ "SCORE" } = $record->{ "SCORE" } || 999; # or use E_VAL somehow
+ $new_record->{ "STRAND" } = $record->{ "STRAND" };
+
+ &Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "VMATCH" and $record->{ "S_ID" } =~ /^chr/i ) # ---- Hits from Vmatch ----
+ {
+ $new_record->{ "CHR" } = $record->{ "S_ID" };
+ $new_record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $new_record->{ "CHR_END" } = $record->{ "S_END" };
+ $new_record->{ "Q_ID" } = $record->{ "Q_ID" };
+ $new_record->{ "SCORE" } = $record->{ "SCORE" } || 999; # or use E_VAL somehow
+ $new_record->{ "STRAND" } = $record->{ "STRAND" };
+
+ &Maasha::UCSC::bed_put_entry( $new_record, $fh, 6 );
+ }
+ elsif ( $record->{ "CHR" } and defined $record->{ "CHR_BEG" } and $record->{ "CHR_END" } ) # ---- Generic data from tables ----
+ {
+ &Maasha::UCSC::bed_put_entry( $record, $fh );
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh;
+}
+
+
+sub script_write_psl
+{
+ # Martin A. Hansen, August 2007.
+
+ # Write PSL output from stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh, $record, @output, $first );
+
+ $first = 1;
+
+ $fh = &write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+ while ( $record = &get_record( $in ) )
+ {
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+
+ if ( $record->{ "REC_TYPE" } and $record->{ "REC_TYPE" } eq "PSL" )
+ {
+ &Maasha::UCSC::psl_put_header( $fh ) if $first;
+ &Maasha::UCSC::psl_put_entry( $record, $fh );
+ $first = 0;
+ }
+ }
+
+ close $fh;
+}
+
+
+sub script_write_2bit
+{
+ # Martin A. Hansen, March 2008.
+
+ # Write sequence entries from stream in 2bit format.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $mask, $tmp_file, $fh_tmp, $fh_in, $fh_out );
+
+ $mask = 1 if not $options->{ "no_mask" };
+
+ $tmp_file = "$TMP_DIR/write_2bit.fna";
+ $fh_tmp = &Maasha::Common::write_open( $tmp_file );
+
+ $fh_out = &write_stream( $options->{ "data_out" } );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_tmp );
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh_tmp;
+
+ $fh_in = &Maasha::Common::read_open( $tmp_file );
+
+ &Maasha::TwoBit::fasta2twobit( $fh_in, $fh_out, $mask );
+
+ close $fh_in;
+ close $fh_out;
+
+ unlink $tmp_file;
+}
+
+
+sub script_write_solid
+{
+ # Martin A. Hansen, April 2008.
+
+ # Write di-base encoded Solid sequence from entries in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $fh, $seq_cs );
+
+ $fh = &write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ $seq_cs = &Maasha::Solid::seq2color_space( $record->{ "SEQ" } );
+
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $seq_cs ], $fh, $options->{ "wrap" } );
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ close $fh;
+}
+
+
+sub script_plot_seqlogo
+{
+ # Martin A. Hansen, August 2007.
+
+ # Calculates and writes a sequence logo for alignments.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @entries, $logo, $fh );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) {
+ push @entries, [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ];
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $logo = &Maasha::Plot::seq_logo( \@entries );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh $logo;
+
+ close $fh;
+}
+
+
+sub script_plot_phastcons_profiles
+{
+ # Martin A. Hansen, January 2008.
+
+ # Plots PhastCons profiles.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $phastcons_file, $phastcons_index, $index, $fh_phastcons, $record, $scores, $AoA, $plot, $fh );
+
+ $options->{ "title" } ||= "PhastCons Profiles";
+
+ $phastcons_file = &Maasha::Config::genome_phastcons( $options->{ "genome" } );
+ $phastcons_index = &Maasha::Config::genome_phastcons_index( $options->{ "genome" } );
+
+ $index = &Maasha::UCSC::phastcons_index_retrieve( $phastcons_index );
+ $fh_phastcons = &Maasha::Common::read_open( $phastcons_file );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "CHR_END" } )
+ {
+ $scores = &Maasha::UCSC::phastcons_index_lookup( $index, $fh_phastcons, $record->{ "CHR" }, $record->{ "CHR_BEG" }, $record->{ "CHR_END" }, $options->{ "flank" } );
+
+ push @{ $AoA }, [ @{ $scores } ];
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ &Maasha::UCSC::phastcons_normalize( $AoA );
+
+ $AoA = [ [ &Maasha::UCSC::phastcons_mean( $AoA ) ] ] if $options->{ "mean" };
+ $AoA = [ [ &Maasha::UCSC::phastcons_median( $AoA ) ] ] if $options->{ "median" };
+
+ $AoA = &Maasha::Matrix::matrix_flip( $AoA );
+
+ $plot = &Maasha::Plot::lineplot_simple( $AoA, $options, $TMP_DIR );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh "$_\n" foreach @{ $plot };
+
+ close $fh;
+}
+
+
+sub script_analyze_bed
+{
+ # Martin A. Hansen, March 2008.
+
+ # Analyze BED entries in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ $record = &Maasha::UCSC::bed_analyze( $record ) if $record->{ "REC_TYPE" } eq "BED";
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_analyze_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Analyze values for given keys in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, @keys, %key_hash, $analysis, $len );
+
+ map { $key_hash{ $_ } = 1 } @{ $options->{ "keys" } };
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( keys %{ $record } )
+ {
+ next if $options->{ "keys" } and not exists $key_hash{ $key };
+
+ $analysis->{ $key }->{ "COUNT" }++;
+
+ if ( &Maasha::Calc::is_a_number( $record->{ $key } ) )
+ {
+ $analysis->{ $key }->{ "TYPE" } = "num";
+ $analysis->{ $key }->{ "SUM" } += $record->{ $key };
+ $analysis->{ $key }->{ "MAX" } = $record->{ $key } if $record->{ $key } > $analysis->{ $key }->{ "MAX" } or not $analysis->{ $key }->{ "MAX" };
+ $analysis->{ $key }->{ "MIN" } = $record->{ $key } if $record->{ $key } < $analysis->{ $key }->{ "MIN" } or not $analysis->{ $key }->{ "MIN" };
+ }
+ else
+ {
+ $len = length $record->{ $key };
+
+ $analysis->{ $key }->{ "TYPE" } = "alph";
+ $analysis->{ $key }->{ "SUM" } += $len;
+ $analysis->{ $key }->{ "MAX" } = $len if $len > $analysis->{ $key }->{ "MAX" } or not $analysis->{ $key }->{ "MAX" };
+ $analysis->{ $key }->{ "MIN" } = $len if $len < $analysis->{ $key }->{ "MIM" } or not $analysis->{ $key }->{ "MIN" };
+ }
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ foreach $key ( keys %{ $analysis } )
+ {
+ $analysis->{ $key }->{ "MEAN" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" } / $analysis->{ $key }->{ "COUNT" };
+ $analysis->{ $key }->{ "SUM" } = sprintf "%.2f", $analysis->{ $key }->{ "SUN" };
+ }
+
+ my ( $keys, $types, $counts, $mins, $maxs, $sums, $means );
+
+ $keys = "KEY ";
+ $types = "TYPE ";
+ $counts = "COUNT";
+ $mins = "MIN ";
+ $maxs = "MAX ";
+ $sums = "SUM ";
+ $means = "MEAN ";
+
+ if ( $options->{ "keys" } ) {
+ @keys = @{ $options->{ "keys" } };
+ } else {
+ @keys = keys %{ $analysis };
+ }
+
+ foreach $key ( @keys )
+ {
+ $keys .= sprintf "% 15s", $key;
+ $types .= sprintf "% 15s", $analysis->{ $key }->{ "TYPE" };
+ $counts .= sprintf "% 15s", $analysis->{ $key }->{ "COUNT" };
+ $mins .= sprintf "% 15s", $analysis->{ $key }->{ "MIN" };
+ $maxs .= sprintf "% 15s", $analysis->{ $key }->{ "MAX" };
+ $sums .= sprintf "% 15s", $analysis->{ $key }->{ "SUM" };
+ $means .= sprintf "% 15s", $analysis->{ $key }->{ "MEAN" };
+ }
+
+ print $out "$keys\n";
+ print $out "$types\n";
+ print $out "$counts\n";
+ print $out "$mins\n";
+ print $out "$maxs\n";
+ print $out "$sums\n";
+ print $out "$means\n";
+}
+
+
+sub script_head_records
+{
+ # Martin A. Hansen, August 2007.
+
+ # Display the first sequences in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $count );
+
+ $options->{ "num" } ||= 10;
+
+ $count = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ $count++;
+
+ &put_record( $record, $out );
+
+ last if $count == $options->{ "num" };
+ }
+}
+
+
+sub script_remove_keys
+{
+ # Martin A. Hansen, August 2007.
+
+ # Remove keys from stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $new_record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $options->{ "keys" } )
+ {
+ map { delete $record->{ $_ } } @{ $options->{ "keys" } };
+ }
+ elsif ( $options->{ "save_keys" } )
+ {
+ map { $new_record->{ $_ } = $record->{ $_ } if exists $record->{ $_ } } @{ $options->{ "save_keys" } };
+
+ $record = $new_record;
+ }
+
+ &put_record( $record, $out ) if keys %{ $record };
+ }
+}
+
+
+sub script_rename_keys
+{
+ # Martin A. Hansen, August 2007.
+
+ # Rename keys in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( exists $record->{ $options->{ "keys" }->[ 0 ] } )
+ {
+ $record->{ $options->{ "keys" }->[ 1 ] } = $record->{ $options->{ "keys" }->[ 0 ] };
+
+ delete $record->{ $options->{ "keys" }->[ 0 ] };
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_uniq_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Find unique values in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( %hash, $record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ $options->{ "key" } } )
+ {
+ if ( not $hash{ $record->{ $options->{ "key" } } } and not $options->{ "invert" } )
+ {
+ &put_record( $record, $out );
+
+ $hash{ $record->{ $options->{ "key" } } } = 1;
+ }
+ elsif ( $hash{ $record->{ $options->{ "key" } } } and $options->{ "invert" } )
+ {
+ &put_record( $record, $out );
+ }
+ else
+ {
+ $hash{ $record->{ $options->{ "key" } } } = 1;
+ }
+ }
+ else
+ {
+ &put_record( $record, $out );
+ }
+ }
+}
+
+
+sub script_merge_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Rename keys in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @join, $i );
+
+ $options->{ "delimit" } ||= '_';
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( exists $record->{ $options->{ "keys" }->[ 0 ] } )
+ {
+ @join = $record->{ $options->{ "keys" }->[ 0 ] };
+
+ for ( $i = 1; $i < @{ $options->{ "keys" } }; $i++ ) {
+ push @join, $record->{ $options->{ "keys" }->[ $i ] } if exists $record->{ $options->{ "keys" }->[ $i ] };
+ }
+
+ $record->{ $options->{ "keys" }->[ 0 ] } = join $options->{ "delimit" }, @join;
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_grab
+{
+ # Martin A. Hansen, August 2007.
+
+ # Grab for records in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $patterns, $pattern, $record, $key, $pos, $op, $val, %lookup_hash );
+
+ if ( $options->{ "patterns" } )
+ {
+ $patterns = [ split ",", $options->{ "patterns" } ];
+ }
+ elsif ( -f $options->{ "patterns_in" } )
+ {
+ $patterns = &Maasha::Patscan::read_patterns( $options->{ "patterns_in" } );
+ }
+ elsif ( -f $options->{ "exact_in" } )
+ {
+ $patterns = &Maasha::Patscan::read_patterns( $options->{ "exact_in" } );
+
+ map { $lookup_hash{ $_ } = 1 } @{ $patterns };
+
+ undef $patterns;
+ }
+
+ if ( $options->{ "eval" } )
+ {
+ if ( $options->{ "eval" } =~ /^([^><=! ]+)\s*(>=|<=|>|<|=|!=|eq|ne)\s*(.+)$/ )
+ {
+ $key = $1;
+ $op = $2;
+ $val = $3;
+ }
+ }
+
+ while ( $record = &get_record( $in ) )
+ {
+ $pos = -1;
+
+ if ( %lookup_hash )
+ {
+ if ( $options->{ "keys" } )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( exists $lookup_hash{ $record->{ $key } } )
+ {
+ $pos = 1;
+ goto FOUND;
+ }
+ }
+ }
+ else
+ {
+ foreach $key ( keys %{ $record } )
+ {
+ if ( not $options->{ "vals_only" } )
+ {
+ if ( exists $lookup_hash{ $key } )
+ {
+ $pos = 1;
+ goto FOUND;
+ }
+ }
+
+ if ( not $options->{ "keys_only" } )
+ {
+ if ( exists $lookup_hash{ $record->{ $key } } )
+ {
+ $pos = 1;
+ goto FOUND;
+ }
+ }
+ }
+ }
+ }
+ elsif ( $patterns )
+ {
+ foreach $pattern ( @{ $patterns } )
+ {
+ if ( $options->{ "keys" } )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ $pos = index $record->{ $key }, $pattern;
+
+ goto FOUND if $pos >= 0;
+ }
+ }
+ else
+ {
+ foreach $key ( keys %{ $record } )
+ {
+ if ( not $options->{ "vals_only" } )
+ {
+ $pos = index $key, $pattern;
+
+ goto FOUND if $pos >= 0;
+ }
+
+ if ( not $options->{ "keys_only" } )
+ {
+ $pos = index $record->{ $key }, $pattern;
+
+ goto FOUND if $pos >= 0;
+ }
+ }
+ }
+ }
+ }
+ elsif ( $options->{ "regex" } )
+ {
+ if ( $options->{ "keys" } )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $options->{ "case_insensitive" } ) {
+ $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/i;
+ } else {
+ $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/;
+ }
+
+ goto FOUND if $pos >= 0;
+ }
+ }
+ else
+ {
+ foreach $key ( keys %{ $record } )
+ {
+ if ( not $options->{ "vals_only" } )
+ {
+ if ( $options->{ "case_insensitive" } ) {
+ $pos = 1 if $key =~ /$options->{'regex'}/i;
+ } else {
+ $pos = 1 if $key =~ /$options->{'regex'}/;
+ }
+
+ goto FOUND if $pos >= 0;
+ }
+
+ if ( not $options->{ "keys_only" } )
+ {
+ if ( $options->{ "case_insensitive" } ) {
+ $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/i;
+ } else {
+ $pos = 1 if $record->{ $key } =~ /$options->{'regex'}/;
+ }
+
+ goto FOUND if $pos >= 0;
+ }
+ }
+ }
+ }
+ elsif ( $options->{ "eval" } )
+ {
+ if ( defined $record->{ $key } )
+ {
+ if ( $op eq "<" and $record->{ $key } < $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq ">" and $record->{ $key } > $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq ">=" and $record->{ $key } >= $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq "<=" and $record->{ $key } <= $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq "=" and $record->{ $key } == $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq "!=" and $record->{ $key } != $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq "eq" and $record->{ $key } eq $val ) {
+ $pos = 1 and goto FOUND;
+ } elsif ( $op eq "ne" and $record->{ $key } ne $val ) {
+ $pos = 1 and goto FOUND;
+ }
+ }
+ }
+
+ FOUND:
+
+ if ( $pos >= 0 and not $options->{ "invert" } ) {
+ &put_record( $record, $out );
+ } elsif ( $pos < 0 and $options->{ "invert" } ) {
+ &put_record( $record, $out );
+ }
+ }
+}
+
+
+sub script_compute
+{
+ # Martin A. Hansen, August 2007.
+
+ # Evaluate extression for records in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $eval_key, $eval_val, $check, @keys );
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $options->{ "eval" } )
+ {
+ if ( $options->{ "eval" } =~ /^(.+)\s*=\s*(.+)$/ )
+ {
+ $eval_key = $1;
+ $eval_val = $2;
+ }
+
+ if ( not $check )
+ {
+ @keys = split /\W+/, $eval_val;
+ @keys = grep { ! /^\d+$/ } @keys;
+
+ $check = 1;
+ }
+
+ map { $eval_val =~ s/$_/$record->{ $_ }/g } @keys;
+
+ $record->{ $eval_key } = eval "$eval_val" or &Maasha::Common::error( "eval failed -> $@" );
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_flip_tab
+{
+ # Martin A. Hansen, June 2008.
+
+ # Flip a table.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, $A, $B, @rows, @matrix, $row, $i );
+
+ while ( $record = &get_record( $in ) )
+ {
+ undef @rows;
+
+ foreach $key ( sort { $A = $a; $B = $b; $A =~ s/^V(\d+)$/$1/; $B =~ s/^V(\d+)$/$1/; $A <=> $B } keys %{ $record } )
+ {
+ push @rows, $record->{ $key };
+
+ }
+
+ push @matrix, [ @rows ];
+ }
+
+ undef $record;
+
+ @matrix = &Maasha::Matrix::matrix_flip( \@matrix );
+
+ foreach $row ( @matrix )
+ {
+ for ( $i = 0; $i < @{ $row }; $i++ ) {
+ $record->{ "V$i" } = $row->[ $i ];
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_add_ident
+{
+ # Martin A. Hansen, May 2008.
+
+ # Add a unique identifier to each record in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, $prefix, $i );
+
+ $key = $options->{ "key" } || "ID";
+ $prefix = $options->{ "prefix" } || "ID";
+
+ $i = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ $record->{ $key } = sprintf( "$prefix%08d", $i );
+
+ &put_record( $record, $out );
+
+ $i++;
+ }
+}
+
+
+sub script_count_records
+{
+ # Martin A. Hansen, August 2007.
+
+ # Count records in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $count, $result, $fh, $line );
+
+ $count = 0;
+
+ if ( $options->{ "no_stream" } )
+ {
+ while ( $line = <$in> )
+ {
+ chomp $line;
+
+ $count++ if $line eq "---";
+ }
+ }
+ else
+ {
+ while ( $record = &get_record( $in ) )
+ {
+ &put_record( $record, $out );
+
+ $count++;
+ }
+ }
+
+ $result = { "count_records" => $count };
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ &put_record( $result, $fh );
+
+ close $fh;
+}
+
+
+sub script_random_records
+{
+ # Martin A. Hansen, August 2007.
+
+ # Pick a number or random records from stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $tmp_file, $fh_out, $fh_in, $count, $i, %rand_hash, $rand, $max );
+
+ $options->{ "num" } ||= 10;
+
+ $tmp_file = "$TMP_DIR/random_records.tmp";
+
+ $fh_out = &Maasha::Common::write_open( $tmp_file );
+
+ $count = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ &put_record( $record, $fh_out );
+
+ $count++;
+ }
+
+ close $fh_out;
+
+ $max = 0;
+ $i = 0;
+
+ &Maasha::Common::error( qq(Requested random records > records in stream) ) if $options->{ "num" } > $count;
+
+ while ( $i < $options->{ "num" } )
+ {
+ $rand = int( rand( $count ) );
+
+ if ( not exists $rand_hash{ $rand } )
+ {
+ $rand_hash{ $rand } = 1;
+
+ $max = $rand if $rand > $max;
+
+ $i++;
+ }
+ }
+
+ $fh_in = &Maasha::Common::read_open( $tmp_file );
+
+ $count = 0;
+
+ while ( $record = &get_record( $fh_in ) )
+ {
+ &put_record( $record, $out ) if exists $rand_hash{ $count };
+
+ last if $count == $max;
+
+ $count++;
+ }
+
+ close $fh_in;
+
+ unlink $tmp_file;
+}
+
+
+sub script_sort_records
+{
+ # Martin A. Hansen, August 2007.
+
+ # Sort to sort records according to keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( @keys, $key, @sort_cmd, $sort_str, $sort_sub, @records, $record, $i );
+
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $key =~ s/n$// ) {
+ push @sort_cmd, qq(\$a->{ "$key" } <=> \$b->{ "$key" });
+ } else {
+ push @sort_cmd, qq(\$a->{ "$key" } cmp \$b->{ "$key" });
+ }
+ }
+
+ $sort_str = join " or ", @sort_cmd;
+ $sort_sub = eval "sub { $sort_str }"; # NB security issue!
+
+ while ( $record = &get_record( $in ) ) {
+ push @records, $record;
+ }
+
+ @records = sort $sort_sub @records;
+
+ if ( $options->{ "reverse" } )
+ {
+ for ( $i = scalar @records - 1; $i >= 0; $i-- ) {
+ &put_record( $records[ $i ], $out );
+ }
+ }
+ else
+ {
+ for ( $i = 0; $i < scalar @records; $i++ ) {
+ &put_record( $records[ $i ], $out );
+ }
+ }
+}
+
+
+sub script_count_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Count records in stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $num, $record, %count_hash, @records, $tmp_file, $fh_out, $fh_in, $cache );
+
+ $tmp_file = "$TMP_DIR/count_cache.tmp";
+
+ $fh_out = &Maasha::Common::write_open( $tmp_file );
+
+ $num = 0;
+
+ while ( $record = &get_record( $in ) )
+ {
+ map { $count_hash{ $_ }{ $record->{ $_ } }++ if exists $record->{ $_ } } @{ $options->{ "keys" } };
+
+ push @records, $record;
+
+ if ( scalar @records > 5_000_000 ) # too many records to hold in memory - use disk cache
+ {
+ map { &put_record( $_, $fh_out ) } @records;
+
+ undef @records;
+
+ $cache = 1;
+ }
+
+ print STDERR "verbose: records read $num\n" if ( $options->{ 'verbose' } and ( $num % 1_000_000 ) == 0 );
+
+ $num++;
+ }
+
+ close $fh_out;
+
+ if ( $cache )
+ {
+ $num = 0;
+
+ $fh_in = &Maasha::Common::read_open( $tmp_file );
+
+ while ( $record = &get_record( $fh_in ) )
+ {
+ map { $record->{ $_ . "_COUNT" } = $count_hash{ $_ }{ $record->{ $_ } } if exists $record->{ $_ } } @{ $options->{ "keys" } };
+
+ &put_record( $record, $out );
+
+ print STDERR "verbose: cache read $num\n" if ( $options->{ 'verbose' } and ( $num % 1_000_000 ) == 0 );
+
+ $num++;
+ }
+
+ close $fh_in;
+ }
+
+ foreach $record ( @records )
+ {
+ map { $record->{ $_ . "_COUNT" } = $count_hash{ $_ }{ $record->{ $_ } } if exists $record->{ $_ } } @{ $options->{ "keys" } };
+
+ &put_record( $record, $out );
+ }
+
+ unlink $tmp_file;
+}
+
+
+sub script_plot_histogram
+{
+ # Martin A. Hansen, September 2007.
+
+ # Plot a simple histogram for a given key using GNU plot.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, %data_hash, $max, @data_list, $i, $result, $fh );
+
+ $options->{ "title" } ||= "Histogram";
+ $options->{ "sort" } ||= "num";
+
+ while ( $record = &get_record( $in ) )
+ {
+ $data_hash{ $record->{ $options->{ "key" } } }++ if $record->{ $options->{ "key" } };
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ if ( $options->{ "sort" } eq "num" ) {
+ map { push @data_list, [ $_, $data_hash{ $_ } ] } sort { $a <=> $b } keys %data_hash;
+ } else {
+ map { push @data_list, [ $_, $data_hash{ $_ } ] } sort keys %data_hash;
+ }
+
+ $result = &Maasha::Plot::histogram_simple( \@data_list, $options );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh "$_\n" foreach @{ $result };
+
+ close $fh;
+}
+
+
+sub script_plot_lendist
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plot length distribution using GNU plot.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, %data_hash, $max, @data_list, $i, $result, $fh );
+
+ $options->{ "title" } ||= "Length Distribution";
+
+ while ( $record = &get_record( $in ) )
+ {
+ $data_hash{ $record->{ $options->{ "key" } } }++ if $record->{ $options->{ "key" } };
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $max = &Maasha::Calc::list_max( [ keys %data_hash ] );
+
+ for ( $i = 0; $i < $max; $i++ ) {
+ push @data_list, [ $i, $data_hash{ $i } || 0 ];
+ }
+
+ $result = &Maasha::Plot::histogram_lendist( \@data_list, $options );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh "$_\n" foreach @{ $result };
+
+ close $fh;
+}
+
+
+sub script_plot_chrdist
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plot chromosome distribution using GNU plot.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, %data_hash, @data_list, $elem, $sort_key, $count, $result, $fh );
+
+ $options->{ "title" } ||= "Chromosome Distribution";
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "CHR" } ) { # generic
+ $data_hash{ $record->{ "CHR" } }++;
+ } elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "S_ID" } =~ /^chr/i ) { # patscan
+ $data_hash{ $record->{ "S_ID" } }++;
+ } elsif ( $record->{ "REC_TYPE" } eq "PSL" and $record->{ "S_ID" } =~ /^chr/i ) { # BLAT / PSL
+ $data_hash{ $record->{ "S_ID" } }++;
+ } elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/i ) { # BLAST
+ $data_hash{ $record->{ "S_ID" } }++;
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ foreach $elem ( keys %data_hash )
+ {
+ $sort_key = $elem;
+
+ $sort_key =~ s/chr//i;
+
+ $sort_key =~ s/^X(.*)/99$1/;
+ $sort_key =~ s/^Y(.*)/99$1/;
+ $sort_key =~ s/^Z(.*)/999$1/;
+ $sort_key =~ s/^M(.*)/9999$1/;
+ $sort_key =~ s/^U(.*)/99999$1/;
+
+ $count = $sort_key =~ tr/_//;
+
+ $sort_key =~ s/_.*/"999999" x $count/ex;
+
+ push @data_list, [ $elem, $data_hash{ $elem }, $sort_key ];
+ }
+
+ @data_list = sort { $a->[ 2 ] <=> $b->[ 2 ] } @data_list;
+
+ $result = &Maasha::Plot::histogram_chrdist( \@data_list, $options );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh "$_\n" foreach @{ $result };
+
+ close $fh;
+}
+
+
+sub script_plot_karyogram
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plot hits on karyogram.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( %options, $record, @data, $fh, $result, %data_hash );
+
+ $options->{ "genome" } ||= "human";
+ $options->{ "feat_color" } ||= "black";
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "CHR_END" } )
+ {
+ push @{ $data_hash{ $record->{ "CHR" } } }, [ $record->{ "CHR_BEG" }, $record->{ "CHR_END" }, $options->{ "feat_color" } ];
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $result = &Maasha::Plot::karyogram( \%data_hash, \%options );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh $result;
+
+ close $fh;
+}
+
+
+sub script_plot_matches
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plot matches in 2D generating a dotplot.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, @data, $fh, $result, %data_hash );
+
+ $options->{ "direction" } ||= "both";
+
+ while ( $record = &get_record( $in ) )
+ {
+ if ( defined $record->{ "Q_BEG" } and defined $record->{ "S_BEG" } and $record->{ "Q_END" } and $record->{ "S_END" } ) {
+ push @data, $record;
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $options->{ "title" } ||= "plot_matches";
+ $options->{ "xlabel" } ||= $data[ 0 ]->{ "Q_ID" };
+ $options->{ "ylabel" } ||= $data[ 0 ]->{ "S_ID" };
+
+ $result = &Maasha::Plot::dotplot_matches( \@data, $options, $TMP_DIR );
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ print $fh "$_\n" foreach @{ $result };
+
+ close $fh;
+}
+
+
+sub script_length_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Determine the length of the value for given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key );
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $record->{ $key } ) {
+ $record->{ $key . "_LEN" } = length $record->{ $key };
+ }
+ }
+
+ &put_record( $record, $out );
+ }
+}
+
+
+sub script_sum_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Calculates the sums for values of given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, %sum_hash, $fh );
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $record->{ $key } ) {
+ $sum_hash{ $key } += $record->{ $key };
+ }
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ foreach $key ( @{ $options->{ "keys" } } ) {
+ &put_record( { $key . "_SUM" => $sum_hash{ $key } || 0 } , $fh );
+ }
+
+ close $fh;
+}
+
+
+sub script_mean_vals
+{
+ # Martin A. Hansen, August 2007.
+
+ # Calculate the mean of values of given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, %sum_hash, %count_hash, $mean, $fh );
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $record->{ $key } )
+ {
+ $sum_hash{ $key } += $record->{ $key };
+ $count_hash{ $key }++;
+ }
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $count_hash{ $key } ) {
+ $mean = sprintf( "%.2f", ( $sum_hash{ $key } / $count_hash{ $key } ) );
+ } else {
+ $mean = "N/A";
+ }
+
+ &put_record( { $key . "_MEAN" => $mean } , $fh );
+ }
+
+ close $fh;
+}
+
+
+sub script_median_vals
+{
+ # Martin A. Hansen, March 2008.
+
+ # Calculate the median values of given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, %median_hash, $median, $fh );
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } ) {
+ push @{ $median_hash{ $key } }, $record->{ $key } if defined $record->{ $key };
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $median_hash{ $key } ) {
+ $median = &Maasha::Calc::median( $median_hash{ $key } );
+ } else {
+ $median = "N/A";
+ }
+
+ &put_record( { $key . "_MEDIAN" => $median } , $fh );
+ }
+
+ close $fh;
+}
+
+
+sub script_max_vals
+{
+ # Martin A. Hansen, February 2008.
+
+ # Determine the maximum values of given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, $fh, %max_hash, $max_record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( $record->{ $key } )
+ {
+ $max_hash{ $key } = $record->{ $key } if $record->{ $key } > $max_hash{ $key };
+ }
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ $max_record->{ $key . "_MAX" } = $max_hash{ $key };
+ }
+
+ &put_record( $max_record, $fh );
+
+ close $fh;
+}
+
+
+sub script_min_vals
+{
+ # Martin A. Hansen, February 2008.
+
+ # Determine the minimum values of given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $key, $fh, %min_hash, $min_record );
+
+ while ( $record = &get_record( $in ) )
+ {
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ if ( defined $record->{ $key } )
+ {
+ if ( exists $min_hash{ $key } ) {
+ $min_hash{ $key } = $record->{ $key } if $record->{ $key } < $min_hash{ $key };
+ } else {
+ $min_hash{ $key } = $record->{ $key };
+ }
+ }
+ }
+
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+ }
+
+ $fh = &write_stream( $options->{ "data_out" } );
+
+ foreach $key ( @{ $options->{ "keys" } } )
+ {
+ $min_record->{ $key . "_MIN" } = $min_hash{ $key };
+ }
+
+ &put_record( $min_record, $fh );
+
+ close $fh;
+}
+
+
+sub script_upload_to_ucsc
+{
+ # Martin A. Hansen, August 2007.
+
+ # Calculate the mean of values of given keys.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file, $wib_file, $wig_file, $wib_dir, $fh_in, $fh_out, $i, $first, $format, $args, $type, $columns, $append, %fh_hash,
+ $chr, $beg, $end, $block, $line, $max, $beg_block, $entry, $q_id, $clones );
+
+ $options->{ "short_label" } ||= $options->{ 'table' };
+ $options->{ "long_label" } ||= $options->{ 'table' };
+ $options->{ "group" } ||= $ENV{ "LOGNAME" };
+ $options->{ "priority" } ||= 1;
+ $options->{ "visibility" } ||= "pack";
+ $options->{ "color" } ||= join( ",", int( rand( 255 ) ), int( rand( 255 ) ), int( rand( 255 ) ) );
+ $options->{ "chunk_size" } ||= 10_000_000_000; # Due to 32-bit UCSC compilation really large tables cannot be loaded in one go.
+
+ $file = "$TMP_DIR/ucsc_upload.tmp";
+
+ $append = 0;
+
+ $first = 1;
+
+ $i = 0;
+
+ if ( $options->{ 'wiggle' } )
+ {
+ $options->{ "visibility" } = "full";
+
+ while ( $record = &get_record( $in ) )
+ {
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+
+ $record->{ "CHR" } = $record->{ "S_ID" } if not defined $record->{ "CHR" };
+ $record->{ "CHR_BEG" } = $record->{ "S_BEG" } if not defined $record->{ "CHR_BEG" };
+ $record->{ "CHR_END" } = $record->{ "S_END" } if not defined $record->{ "CHR_END" };
+
+ $fh_hash{ $record->{ "CHR" } } = &Maasha::Common::write_open( "$TMP_DIR/$record->{ 'CHR' }" ) if not exists $fh_hash{ $record->{ "CHR" } };
+
+ $fh_out = $fh_hash{ $record->{ "CHR" } };
+
+ &Maasha::UCSC::bed_put_entry( $record, $fh_out, 5 );
+ }
+
+ map { close $_ } keys %fh_hash;
+
+ $fh_out = &Maasha::Common::write_open( $file );
+
+ foreach $chr ( sort keys %fh_hash )
+ {
+ &Maasha::Common::run( "bedSort", "$TMP_DIR/$chr $TMP_DIR/$chr" );
+
+ $fh_in = &Maasha::Common::read_open( "$TMP_DIR/$chr" );
+
+ undef $block;
+
+ while ( $entry = &Maasha::UCSC::bed_get_entry( $fh_in, 5 ) )
+ {
+ $chr = $entry->{ 'CHR' };
+ $beg = $entry->{ 'CHR_BEG' };
+ $end = $entry->{ 'CHR_END' };
+ $q_id = $entry->{ 'Q_ID' };
+
+ if ( $q_id =~ /_(\d+)$/ ) {
+ $clones = $1;
+ } else {
+ $clones = 1;
+ }
+
+ if ( $block )
+ {
+ if ( $beg > $max )
+ {
+ &Maasha::UCSC::fixedstep_put_entry( $chr, $beg_block, $block, $fh_out );
+ undef $block;
+ }
+ else
+ {
+ for ( $i = $beg - $beg_block; $i < ( $beg - $beg_block ) + ( $end - $beg ); $i++ ) {
+ $block->[ $i ] += $clones;
+ }
+
+ $max = &Maasha::Calc::max( $max, $end );
+ }
+ }
+
+ if ( not $block )
+ {
+ $beg_block = $beg;
+ $max = $end;
+
+ for ( $i = 0; $i < ( $end - $beg ); $i++ ) {
+ $block->[ $i ] += $clones;
+ }
+ }
+ }
+
+ close $fh_in;
+
+ &Maasha::UCSC::fixedstep_put_entry( $chr, $beg_block, $block, $fh_out );
+
+ unlink "$TMP_DIR/$chr";
+ }
+
+ close $fh_out;
+
+ $wig_file = "$options->{ 'table' }.wig";
+ $wib_file = "$options->{ 'table' }.wib";
+
+ $wib_dir = "$ENV{ 'DATA_DIR' }/genomes/$options->{ 'database' }/wib";
+
+ &Maasha::Common::dir_create_if_not_exists( $wib_dir );
+
+ # &Maasha::Common::run( "wigEncode", "$file $wig_file $wib_file > /dev/null 2>&1" );
+
+ `cd $TMP_DIR && wigEncode $file $wig_file $wib_file > /dev/null 2>&1`;
+ &Maasha::Common::run( "mv", "$TMP_DIR/$wib_file $wib_dir" );
+
+ unlink $file;
+
+ $file = $wig_file;
+
+ $format = "WIGGLE";
+ }
+ else
+ {
+ $fh_out = &Maasha::Common::write_open( $file );
+
+ while ( $record = &get_record( $in ) )
+ {
+ &put_record( $record, $out ) if not $options->{ "no_stream" };
+
+ if ( $record->{ "REC_TYPE" } eq "PSL" )
+ {
+ &Maasha::UCSC::psl_put_header( $fh_out ) if $first;
+ &Maasha::UCSC::psl_put_entry( $record, $fh_out );
+
+ $first = 0;
+
+ $format = "PSL" if not $format;
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BED" and $record->{ "SEC_STRUCT" } )
+ {
+ # chrom chromStart chromEnd name score strand size secStr conf
+
+ print $fh_out join ( "\t",
+ $record->{ "CHR" },
+ $record->{ "CHR_BEG" },
+ $record->{ "CHR_END" } + 1,
+ $record->{ "Q_ID" },
+ $record->{ "SCORE" },
+ $record->{ "STRAND" },
+ $record->{ "SIZE" },
+ $record->{ "SEC_STRUCT" },
+ $record->{ "CONF" },
+ ), "\n";
+
+ $format = "BED_SS" if not $format;
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BED" )
+ {
+ &Maasha::UCSC::bed_put_entry( $record, $fh_out, $record->{ "BED_COLS" } );
+
+ $format = "BED" if not $format;
+ $columns = $record->{ "BED_COLS" } if not $columns;
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "PATSCAN" and $record->{ "CHR" } )
+ {
+ &Maasha::UCSC::bed_put_entry( $record, $fh_out, 6 );
+
+ $format = "BED" if not $format;
+ $columns = 6 if not $columns;
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "BLAST" and $record->{ "S_ID" } =~ /^chr/ )
+ {
+ $record->{ "CHR" } = $record->{ "S_ID" };
+ $record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $record->{ "CHR_END" } = $record->{ "S_END" };
+ $record->{ "SCORE" } = $record->{ "BIT_SCORE" } * 1000;
+
+ $format = "BED" if not $format;
+ $columns = 6 if not $columns;
+
+ &Maasha::UCSC::bed_put_entry( $record, $fh_out );
+ }
+ elsif ( $record->{ "REC_TYPE" } eq "VMATCH" and $record->{ "S_ID" } =~ /^chr/i )
+ {
+ $record->{ "CHR" } = $record->{ "S_ID" };
+ $record->{ "CHR_BEG" } = $record->{ "S_BEG" };
+ $record->{ "CHR_END" } = $record->{ "S_END" };
+ $record->{ "SCORE" } = $record->{ "SCORE" } || 999;
+ $record->{ "SCORE" } = int( $record->{ "SCORE" } );
+
+ $format = "BED" if not $format;
+ $columns = 6 if not $columns;
+
+ &Maasha::UCSC::bed_put_entry( $record, $fh_out, 6 );
+ }
+
+ if ( $i == $options->{ "chunk_size" } )
+ {
+ close $fh_out;
+
+ if ( $format eq "BED" ) {
+ &Maasha::UCSC::bed_upload_to_ucsc( $TMP_DIR, $file, $options, $append );
+ } elsif ( $format eq "PSL" ) {
+ &Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append );
+ }
+
+ unlink $file;
+
+ $first = 1;
+
+ $append = 1;
+
+ $fh_out = &Maasha::Common::write_open( $file );
+ }
+
+ $i++;
+ }
+ }
+
+ close $fh_out;
+
+ if ( exists $options->{ "database" } and $options->{ "table" } )
+ {
+ if ( $format eq "BED" )
+ {
+ $type = "bed $columns";
+
+ &Maasha::UCSC::bed_upload_to_ucsc( $TMP_DIR, $file, $options, $append );
+ }
+ elsif ( $format eq "BED_SS" )
+ {
+ $options->{ "sec_struct" } = 1;
+
+ $type = "sec_struct";
+
+ &Maasha::UCSC::bed_upload_to_ucsc( $TMP_DIR, $file, $options, $append );
+ }
+ elsif ( $format eq "PSL" )
+ {
+ $type = "psl";
+
+ &Maasha::UCSC::psl_upload_to_ucsc( $file, $options, $append );
+ }
+ elsif ( $format eq "WIGGLE" )
+ {
+ $type = "wig 0";
+
+ &Maasha::UCSC::wiggle_upload_to_ucsc( $TMP_DIR, $wib_dir, $file, $options );
+ }
+
+ unlink $file;
+
+ &Maasha::UCSC::update_my_tracks( $options, $type );
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub read_stream
+{
+ # Martin A. Hansen, July 2007.
+
+ # Opens a stream to STDIN or a file,
+
+ my ( $path, # path - OPTIONAL
+ ) = @_;
+
+ # Returns filehandle.
+
+ my ( $fh );
+
+ if ( not -t STDIN ) {
+ $fh = &Maasha::Common::read_stdin();
+ } elsif ( not $path ) {
+# &Maasha::Common::error( qq(no data stream) );
+ } else {
+ $fh = &Maasha::Common::read_open( $path );
+ }
+
+# $fh->autoflush(1) if $fh;
+
+ return $fh;
+}
+
+
+sub write_stream
+{
+ # Martin A. Hansen, August 2007.
+
+ # Opens a stream to STDOUT or a file.
+
+ my ( $path, # path - OPTIONAL
+ $gzip, # compress data - OPTIONAL
+ ) = @_;
+
+ # Returns filehandle.
+
+ my ( $fh );
+
+ if ( $path ) {
+ $fh = &Maasha::Common::write_open( $path, $gzip );
+ } else {
+ $fh = &Maasha::Common::write_stdout();
+ }
+
+ return $fh;
+}
+
+
+sub get_record
+{
+ # Martin A. Hansen, July 2007.
+
+ # Reads one record at a time and converts that record
+ # to a Perl data structure (a hash) which is returned.
+
+ my ( $fh,
+ ) = @_;
+
+ # Returns data structure.
+
+ my ( $block, @lines, $line, $key, $value, %record );
+
+ local $/ = "\n---\n";
+
+ $block = <$fh>;
+
+ chomp $block;
+
+ return if not defined $block;
+
+ @lines = split "\n", $block;
+
+ foreach $line ( @lines )
+ {
+ ( $key, $value ) = split ": ", $line;
+
+ $record{ $key } = $value;
+ }
+
+ return wantarray ? %record : \%record;
+}
+
+
+sub put_record
+{
+ # Martin A. Hansen, July 2007.
+
+ # Given a Perl datastructure (a hash ref) emits this to STDOUT or a filehandle.
+
+ my ( $data, # data structure
+ $fh, # file handle - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ if ( scalar keys %{ $data } )
+ {
+ if ( $fh )
+ {
+ map { print $fh "$_: $data->{ $_ }\n" } keys %{ $data };
+ print $fh "---\n";
+ }
+ else
+ {
+ map { print "$_: $data->{ $_ }\n" } keys %{ $data };
+ print "---\n";
+ }
+ }
+
+ undef $data;
+}
+
+
+sub getopt_files
+{
+ # Martin A. Hansen, November 2007.
+
+ # Extracts files from an explicit GetOpt::Long argument
+ # allowing for the use of glob. E.g.
+ # --data_in=test.fna
+ # --data_in=test.fna,test2.fna
+ # --data_in=*.fna
+ # --data_in=test.fna,/dir/*.fna
+
+ my ( $option, # option from GetOpt::Long
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $elem, @files );
+
+ foreach $elem ( split ",", $option )
+ {
+ if ( -f $elem ) {
+ push @files, $elem;
+ } elsif ( $elem =~ /\*/ ) {
+ push @files, glob( $elem );
+ }
+ }
+
+ return wantarray ? @files : \@files;
+}
+
+
+sub sig_handler
+{
+ # Martin A. Hansen, April 2008.
+
+ # Removes temporary directory and exits gracefully.
+ # This subroutine is meant to be run always as the last
+ # thing even if a script is dies or is interrupted
+ # or killed.
+
+ my ( $sig, # signal from the %SIG
+ ) = @_;
+
+ # print STDERR "signal->$sig<-\n";
+
+ chomp $sig;
+
+ sleep 1;
+
+ if ( -d $TMP_DIR )
+ {
+ if ( $sig =~ /MAASHA_ERROR/ ) {
+ print STDERR "\nProgram '$script' had an error" . " - Please wait for temporary data to be removed\n";
+ } elsif ( $sig eq "INT" ) {
+ print STDERR "\nProgram '$script' interrupted (ctrl-c was pressed)" . " - Please wait for temporary data to be removed\n";
+ } elsif ( $sig eq "TERM" ) {
+ print STDERR "\nProgram '$script' terminated (someone used kill?)" . " - Please wait for temporary data to be removed\n";
+ } else {
+ print STDERR "\nProgram '$script' died->$sig" . " - Please wait for temporary data to be removed\n";
+ }
+
+ # This is a really bad solution, potentially, anyone can include this module and set
+ # the TMP_DIR to point at any dir and thus take out the machine !!!
+
+ &Maasha::Common::dir_remove( $TMP_DIR );
+ }
+
+ exit( 0 );
+}
+
+
+END
+{
+ # This is a really bad solution, potentially, anyone can include this module and set
+ # the TMP_DIR to point at any dir and thus take out the machine !!!
+
+ &Maasha::Common::dir_remove( $TMP_DIR );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+1;
+
+__END__
+
+
+sub script_read_soft
+{
+ # Martin A. Hansen, December 2007.
+
+ # Read soft format.
+ # http://www.ncbi.nlm.nih.gov/geo/info/soft2.html
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $data_in, $file, $num, $records, $record );
+
+ while ( $record = &get_record( $in ) ) {
+ &put_record( $record, $out );
+ }
+
+ $num = 1;
+
+ foreach $file ( @{ $options->{ "files" } } )
+ {
+ $records = &Maasha::NCBI::soft_parse( $file );
+
+ foreach $record ( @{ $records } )
+ {
+ &put_record( $record, $out );
+
+ goto NUM if $options->{ "num" } and $num == $options->{ "num" };
+
+ $num++;
+ }
+ }
+
+ NUM:
+
+ close $data_in if $data_in;
+}
--- /dev/null
+package Maasha::Blast;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines to run BLAST and parse results.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Storable qw( dclone );
+use Data::Dumper;
+use vars qw( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# The blast report structured like this:
+#
+# for the first entry:
+#
+# 1 - blast program name
+# 2 - blast reference
+# 3 - query sequence name and length
+# 4 - subject database
+# 5 - sequences producing significant alignments
+# 6 - one or more HSP for each subject sequence
+# 7 - blast statistics
+#
+# for subsequent entries:
+#
+# 3 - query sequence name and length
+# 5 - sequences producing significant alignments
+# 6 - one or more HSP for each subject sequence
+# 7 - blast statistics
+#
+# ________________________
+#
+# info
+# query
+# parems
+# subject
+# hit1
+# hsp1
+# hsp2
+# hsp3
+# hit2
+# hsp1
+# hit3
+# hsp1
+# hsp2
+# stats
+# ________________________
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub xml_parse_blast
+{
+ # Martin A. Hansen, March 2007.
+
+ # determines if the results is from ncbi blast or blastcl3
+ # and parses the results in accordance.
+
+ my ( $fh,
+ ) = @_;
+
+ # returns list
+
+ my ( $results, $line,$doctype );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^<!DOCTYPE/ )
+ {
+ $doctype = $line;
+ last;
+ }
+ }
+
+ if ( $doctype eq '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">' )
+ {
+ print STDERR qq(Parsing blastcl3 results ...\n);
+ $results = &xml_parse_blast_blastcl3( $fh );
+ }
+ elsif ( $doctype eq '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">' )
+ {
+ print STDERR qq(Parsing NCBI blast results ...\n);
+ $results = &xml_parse_blast_ncbi( $fh );
+ }
+ else
+ {
+ die qq(ERROR: Could not determine doctype\n);
+ }
+
+ return wantarray ? @{ $results } : $results;
+}
+
+
+sub xml_parse_blast_ncbi
+{
+ # Martin A. Hansen, February 2007.
+
+ my ( $fh,
+ ) = @_;
+
+ my ( $blast_record, $line, @blast_query, @blast_subject, $query, $subject, @results );
+
+ while ( $blast_record = &xml_get_blast_record( $fh ) and scalar @{ $blast_record } > 0 )
+ {
+ foreach $line ( @{ $blast_record } )
+ {
+ if ( $line =~ /<Iteration_query-ID>|<Iteration_query-def>|<Iteration_query-len>/ )
+ {
+ push @blast_query, $line;
+ }
+ elsif ( @blast_query )
+ {
+ push @blast_subject, $line;
+
+ if ( $line =~ /<\/Iteration_hits>/ )
+ {
+ $query = &xml_parse_blast_query( \@blast_query );
+ $subject = &xml_parse_blast_subject( \@blast_subject );
+
+ push @results, {
+ "QUERY" => $query,
+ "SUBJECT" => $subject,
+ };
+
+ undef @blast_query;
+ undef @blast_subject;
+ }
+ }
+ }
+ }
+
+ return wantarray ? @results : \@results;
+}
+
+
+sub xml_parse_blast_blastcl3
+{
+ # Martin A. Hansen, February 2007.
+
+ my ( $fh,
+ ) = @_;
+
+ my ( $blast_record, $line, @blast_query, @blast_subject, $query, $subject, @results );
+
+ while ( $blast_record = &xml_get_blast_record( $fh ) and scalar @{ $blast_record } > 0 )
+ {
+ foreach $line ( @{ $blast_record } )
+ {
+ if ( $line =~ /<BlastOutput_query-ID>|<BlastOutput_query-def>|<BlastOutput_query-len>/ )
+ {
+ push @blast_query, $line;
+ }
+ elsif ( @blast_query )
+ {
+ push @blast_subject, $line;
+
+ if ( $line =~ /<\/Iteration_hits>/ )
+ {
+ $query = &xml_parse_blast_query( \@blast_query );
+ $subject = &xml_parse_blast_subject( \@blast_subject );
+
+ push @results, {
+ "QUERY" => $query,
+ "SUBJECT" => $subject,
+ };
+
+ undef @blast_query;
+ undef @blast_subject;
+ }
+ }
+ }
+ }
+
+ return wantarray ? @results : \@results;
+}
+
+
+sub xml_get_blast_record
+{
+ # Martin A. Hansen, March 2007.
+
+ my ( $fh, # file handle to BLAST file in XML format
+ ) = @_;
+
+ # returns list of lines
+
+ my ( $line, @blast_record );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ push @blast_record, $line;
+
+ last if $line =~ /<\/BlastOutput>/;
+ }
+
+ return wantarray ? @blast_record : \@blast_record;
+}
+
+
+sub xml_parse_blast_query
+{
+ my ( $lines,
+ ) = @_;
+
+ my ( $line, %hash );
+
+ foreach $line ( @{ $lines } )
+ {
+ if ( $line =~ /<Iteration_query-ID>([^<]+)/ ) {
+ $hash{ "Q_ID" } = $1;
+ } elsif ( $line =~ /<Iteration_query-def>([^<]+)/ ) {
+ $hash{ "Q_DEF" } = $1;
+ } elsif ( $line =~ /<Iteration_query-len>([^<]+)/ ) {
+ $hash{ "Q_LEN" } = $1;
+ }
+ }
+
+ return wantarray ? %hash : \%hash;
+}
+
+
+sub xml_parse_blast_subject
+{
+ # Martin A. Hansen, March 2007.
+
+ my ( $lines, #
+ ) = @_;
+
+ # returns
+
+ my ( $line, @blast_hit, @blast_hsps, $hit, $hsps, @hits );
+
+ foreach $line ( @{ $lines } )
+ {
+ if ( $line =~ /<Hit_id>|<Hit_def>|<Hit_accession>|<Hit_len>|<Hit_num>/ )
+ {
+ push @blast_hit, $line;
+ }
+ elsif ( @blast_hit )
+ {
+ push @blast_hsps, $line;
+
+ if ( $line =~ /<\/Hit_hsps>/ )
+ {
+ $hit = &xml_parse_blast_hit( \@blast_hit );
+ $hsps = &xml_parse_blast_hsps( \@blast_hsps );
+
+ $hit->{ "HSPS" } = $hsps;
+
+ push @hits, $hit;
+
+ undef @blast_hit;
+ undef @blast_hsps;
+ }
+ }
+ }
+
+ return wantarray ? @hits : \@hits;
+}
+
+
+sub xml_parse_blast_hit
+{
+ my ( $lines
+ ) = @_;
+
+ my ( $line, %hash );
+
+ foreach $line ( @{ $lines } )
+ {
+ if ( $line =~ /<Hit_num>([^<]+)/ ) {
+ $hash{ "S_NUM" } = $1;
+ } elsif ( $line =~ /<Hit_id>([^<]+)/ ) {
+ $hash{ "S_ID" } = $1;
+ } elsif ( $line =~ /<Hit_def>([^<]+)/ ) {
+ $hash{ "S_DEF" } = $1;
+ } elsif ( $line =~ /<Hit_accession>([^<]+)/ ) {
+ $hash{ "S_ACC" } = $1;
+ } elsif ( $line =~ /<Hit_len>([^<]+)/ ) {
+ $hash{ "S_LEN" } = $1;
+ }
+ }
+
+ return wantarray ? %hash : \%hash;
+}
+
+
+sub xml_parse_blast_hsps
+{
+ # Martin A. Hansen, March 2007.
+
+ my ( $blast_hits, #
+ ) = @_;
+
+ # returns
+
+ my ( $line, %hash, @hsps );
+
+ foreach $line ( @{ $blast_hits } )
+ {
+ if ( $line =~ /<Hsp_num>([^<]+)/ ) {
+ $hash{ "NUM" } = $1;
+ } elsif ( $line =~ /<Hsp_evalue>([^<]+)/ ) {
+ $hash{ "E_VAL" } = $1;
+ } elsif ( $line =~ /<Hsp_query-from>([^<]+)/ ) {
+ $hash{ "Q_BEG" } = $1 - 1;
+ } elsif ( $line =~ /<Hsp_query-to>([^<]+)/ ) {
+ $hash{ "Q_END" } = $1 - 1;
+ } elsif ( $line =~ /<Hsp_hit-from>([^<]+)/ ) {
+ $hash{ "S_BEG" } = $1 - 1;
+ } elsif ( $line =~ /<Hsp_hit-to>([^<]+)/ ) {
+ $hash{ "S_END" } = $1 - 1;
+ } elsif ( $line =~ /<Hsp_query-frame>([^<]+)/ ) {
+ $hash{ "Q_FRAME" } = $1;
+ } elsif ( $line =~ /<Hsp_hit-frame>([^<]+)/ ) {
+ $hash{ "S_FRAME" } = $1;
+ } elsif ( $line =~ /<Hsp_qseq>([^<]+)/ ) {
+ $hash{ "Q_ALIGN" } = $1;
+ } elsif ( $line =~ /<Hsp_hseq>([^<]+)/ ) {
+ $hash{ "S_ALIGN" } = $1;
+ } elsif ( $line =~ /<Hsp_midline>([^<]+)/ ) {
+ $hash{ "MIDLINE" } = $1;
+ } elsif ( $line =~ /<\/Hsp>/ ) {
+ push @hsps, dclone \%hash;
+ }
+ }
+
+ return wantarray ? @hsps : \@hsps;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+package Maasha::Calc;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# This modules contains subroutines for simple algebra.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Storable qw( dclone );
+use vars qw ( @ISA @EXPORT );
+use Exporter;
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub is_a_number
+{
+ # Identify if a string is a number or not.
+ # Taken from perldoc -q 'is a number'.
+
+ my ( $str, # string to test
+ ) = @_;
+
+ # Returns boolean.
+
+ if ( $str =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/ ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+
+sub dist_point2line
+{
+ # Martin A. Hansen, June 2004.
+
+ # calculates the distance from at point to a line.
+ # the line is represented by a beg/end set of coordinates.
+
+ my ( $px, # point x coordinate
+ $py, # point y coordinate
+ $x1, # line 1 x coordinate
+ $y1, # line 1 y coordinate
+ $x2, # line 2 x coordinate
+ $y2, # line 2 y coordinate
+ ) = @_;
+
+ # returns float
+
+ my ( $dist, $a, $b );
+
+ $a = ( $y2 - $y1 ) / ( $x2 - $x1 );
+
+ $b = $y1 - $a * $x1;
+
+ $dist = abs( $a * $px + $b - $py ) / sqrt( $a ** 2 + 1 );
+
+ return $dist;
+}
+
+
+sub dist_point2point
+{
+ # Martin A. Hansen, April 2004.
+
+ # calculates the distance between two set of coordinates
+
+ my ( $x1,
+ $y1,
+ $x2,
+ $y2,
+ ) = @_;
+
+ # returns float
+
+ my $dist;
+
+ $dist = sqrt( ( $x2 - $x1 ) ** 2 + ( $y2 - $y1 ) ** 2 );
+
+ return $dist;
+}
+
+
+sub dist_interval
+{
+ # Martin A. Hansen, February 2008.
+
+ # Returns the distance between two given intervals.
+ # 0 indicates that the intervals are overlapping.
+
+ my ( $beg1,
+ $end1,
+ $beg2,
+ $end2,
+ ) = @_;
+
+ # Returns number
+
+ if ( $beg2 > $end1 ) {
+ return $beg2 - $end1;
+ } elsif ( $beg1 > $end2 ) {
+ return $beg1 - $end2;
+ } else {
+ return 0;
+ }
+}
+
+
+sub mean
+{
+ # Martin A. Hansen, April 2007
+
+ # Given a list of numbers, calculates and returns the mean.
+
+ my ( $numbers, # list of numbers
+ ) = @_;
+
+ # returns decimal number
+
+ my ( $sum, $mean );
+
+ $sum = 0;
+
+ map { $sum += $_ } @{ $numbers };
+
+ $mean = $sum / @{ $numbers };
+
+ return $mean;
+}
+
+
+sub median
+{
+ # Martin A. Hansen, January 2008
+
+ # Given a list of numbers, calculates and returns the median.
+
+ my ( $numbers, # list of numbers
+ ) = @_;
+
+ # returns decimal number
+
+ my ( $num, $median );
+
+ @{ $numbers } = sort { $a <=> $b } @{ $numbers };
+
+ $num = scalar @{ $numbers };
+
+ if ( $num % 2 == 0 ) {
+ $median = &mean( [ $numbers->[ $num / 2 ], $numbers->[ $num / 2 + 1 ] ] );
+ } else {
+ $median = $numbers->[ int( $num / 2 ) ];
+ }
+
+ return $median;
+}
+
+
+sub min
+{
+ # Martin A. Hansen, August 2006.
+
+ # Return the smallest of two given numbers.
+
+ my ( $x, # first number
+ $y, # second number
+ ) = @_;
+
+ # Returns number
+
+ if ( $x <= $y ) {
+ return $x;
+ } else {
+ return $y;
+ }
+}
+
+sub max
+{
+ # Martin A. Hansen, November 2006.
+
+ # Return the largest of two given numbers.
+
+ my ( $x, # first number
+ $y, # second number
+ ) = @_;
+
+ # Returns number
+
+ if ( $x > $y ) {
+ return $x;
+ } else {
+ return $y;
+ }
+}
+
+
+sub minmax
+{
+ # Martin A. Hansen, April 2007.
+
+ # given a list of numbers returns a tuple with min and max
+
+ my ( $list, # list of numbers
+ ) = @_;
+
+ # returns a tuple
+
+ my ( $num, $min, $max );
+
+ $min = $max = $list->[ 0 ];
+
+ foreach $num ( @{ $list } )
+ {
+ $min = $num if $num < $min;
+ $max = $num if $num > $max;
+ }
+
+ return wantarray ? ( $min, $max ) : [ $min, $max ];
+}
+
+
+sub list_max
+{
+ # Martin A. Hansen, August 2007.
+
+ # Returns the maximum number in a given list.
+
+ my ( $list, # list of numbers
+ ) = @_;
+
+ # Returns float
+
+ my ( $max, $num );
+
+ $max = $list->[ 0 ];
+
+ foreach $num ( @{ $list } ) {
+ $max = $num if $num > $max;
+ }
+
+ return $max;
+}
+
+
+sub list_min
+{
+ # Martin A. Hansen, August 2007.
+
+ # Returns the minimum number in a given list.
+
+ my ( $list, # list of numbers
+ ) = @_;
+
+ # Returns float
+
+ my ( $min, $num );
+
+ $min = $list->[ 0 ];
+
+ foreach $num ( @{ $list } ) {
+ $min = $num if $num < $min;
+ }
+
+ return $min;
+}
+
+
+sub sum
+{
+ # Martin A. Hansen, April 2007.
+
+ # Sums a list of given numbers and
+ # returns the sum.
+
+ my ( $list, # list of numbers
+ ) = @_;
+
+ # returns float
+
+ my ( $sum );
+
+ $sum = 0;
+
+ map { $sum += $_ } @{ $list };
+
+ return $sum;
+}
+
+
+sub overlap
+{
+ # Martin A. Hansen, November 2003.
+
+ # Tests if two invervals overlap
+ # returns 1 if overlapping else 0.
+
+ my ( $beg1,
+ $end1,
+ $beg2,
+ $end2,
+ ) = @_;
+
+ # returns integer
+
+ if ( $beg1 > $end1 ) { ( $beg1, $end1 ) = ( $end1, $beg1 ) };
+ if ( $beg2 > $end2 ) { ( $beg2, $end2 ) = ( $end2, $beg2 ) };
+
+ if ( $end1 < $beg2 or $beg1 > $end2 ) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+package Maasha::Common;
+
+
+# Copyright (C) 2006-2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# This module contains commonly used routines
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Carp;
+use Data::Dumper;
+use Storable;
+use IO::File;
+use Maasha::Config;
+
+use Exporter;
+
+use vars qw( @ISA @EXPORT @EXPORT_OK );
+
+@ISA = qw( Exporter ) ;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub error
+{
+ # Martin A. Hansen, February 2008.
+
+ # Print error message and exit with stack trace.
+
+ my ( $msg, # Error message.
+ $no_stack, # disable stack trace - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $script, $error, @lines, $line, $routine, $file, $line_no, @table, $routine_max, $file_max, $line_max );
+
+ chomp $msg;
+
+ $script = &get_scriptname();
+
+ $error = &Carp::longmess();
+
+ @lines = split "\n", $error;
+
+ $line = shift @lines;
+
+ push @table, [ "Routine", "File", "Line" ];
+ push @table, [ "-------", "----", "----" ];
+
+ $routine_max = length "Routine";
+ $file_max = length "File";
+ $line_max = length "Line";
+
+ if ( $line =~ /^ at (.+) line (\d+)$/ )
+ {
+ $file = $1;
+ $line_no = $2;
+
+ $file_max = length $file if length $file > $file_max;
+ $line_max = length $line_no if length $line_no > $line_max;
+
+ push @table, [ "", $file, $line_no ];
+ }
+ else
+ {
+ die qq(ERROR: Unrecognized error line "$line"\n);
+ }
+
+ foreach $line ( @lines )
+ {
+ if ( $line =~ /^\s*(.+) called at (.+) line (\d+)\s*$/ )
+ {
+ $routine = $1;
+ $file = $2;
+ $line_no = $3;
+
+ $routine_max = length $routine if length $routine > $routine_max;
+ $file_max = length $file if length $file > $file_max;
+ $line_max = length $line_no if length $line_no > $line_max;
+
+ push @table, [ $routine, $file, $line_no ];
+ }
+ else
+ {
+ die qq(ERROR: Unrecognized error line "$line"\n);
+ }
+ }
+
+ $msg =~ s/\.$//;
+
+ print STDERR qq(\nERROR!\n\nProgram \'$script\' failed: $msg.\n\n);
+
+ die( "MAASHA_ERROR" ) if $no_stack;
+
+ $routine_max += 3;
+ $file_max += 3;
+ $line_max += 3;
+
+ foreach $line ( @table ) {
+ printf( STDERR "%-${routine_max}s%-${file_max}s%s\n", @{ $line } );
+ }
+
+ print STDERR "\n";
+
+ die( "MAASHA_ERROR" );
+}
+
+
+sub read_open
+{
+ # Martin A. Hansen, January 2004.
+
+ # read opens a file and returns a filehandle.
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ # returns filehandle
+
+ my ( $fh, $type );
+
+ $type = `file $path` if $path;
+
+ if ( $type =~ /gzip compressed/ ) {
+ $fh = new IO::File "zcat $path|" or &Maasha::Common::error( qq(Could not read-open file "$path": $!) );
+ } else {
+ $fh = new IO::File $path, "r" or &Maasha::Common::error( qq(Could not read-open file "$path": $!) );
+ }
+
+ return $fh;
+}
+
+
+sub write_open
+{
+ # Martin A. Hansen, January 2004.
+
+ # write opens a file and returns a filehandle
+
+ my ( $path, # full path to file
+ $gzip, # flag if data is to be gzipped - OPRIONAL
+ ) = @_;
+
+ # returns filehandle
+
+ my ( $fh );
+
+ if ( $gzip ) {
+ $fh = new IO::File "|gzip -f>$path" or &Maasha::Common::error( qq(Could not write-open file "$path": $!) );
+ } else {
+ $fh = new IO::File $path, "w" or &Maasha::Common::error( qq(Could not write-open file "$path": $!) );
+ }
+
+ return $fh;
+}
+
+
+sub append_open
+{
+ # Martin A. Hansen, February 2006.
+
+ # append opens file and returns a filehandle
+
+ my ( $path, # path to file
+ ) = @_;
+
+ # returns filehandle
+
+ my ( $fh );
+
+ $fh = new IO::File $path, "a" or &Maasha::Common::error( qq(Could not append-open file "$path": $!) );
+
+ return $fh;
+}
+
+
+sub pipe_open
+{
+ # Martin A. Hansen, January 2007.
+
+ # opens a pipe and returns a filehandle
+
+ my ( $fh );
+
+ $fh = new IO::File "-" or &Maasha::Common::error( qq(Could not open pipe: $!) );
+
+ return $fh;
+}
+
+
+sub read_stdin
+{
+ # Martin A. Hansen, July 2007.
+
+ # Returns a filehandle to STDIN
+
+ my ( $fh );
+
+ $fh = new IO::File "<&STDIN" or &Maasha::Common::error( qq(Could not read from STDIN: $!) );
+
+ return $fh;
+}
+
+
+sub write_stdout
+{
+ # Martin A. Hansen, July 2007.
+
+ # Returns a filehandle to STDOUT
+
+ my ( $fh );
+
+ $fh = new IO::File ">&STDOUT" or &Maasha::Common::error( qq(Could not write to STDOUT: $!) );
+
+ return $fh;
+}
+
+
+sub file_store
+{
+ # Martin A. Hansen, December 2004.
+
+ # writes a data structure to file.
+
+ my ( $path, # full path to file
+ $data, # data structure
+ ) = @_;
+
+ &Storable::store( $data, $path ) or &Maasha::Common::error( qq(Could not write-open file "$path": $!) );
+}
+
+
+sub file_retrieve
+{
+ # Martin A. Hansen, December 2004.
+
+ # retrieves hash data structure
+ # (this routines needs to test if its a hash, array or else)
+
+ my ( $path, # full path to data file
+ ) = @_;
+
+ my ( $data );
+
+ $data = &Storable::retrieve( $path ) or &Maasha::Common::error( qq(Could not read-open file "$path": $!) );
+
+ return wantarray ? %{ $data } : $data;
+}
+
+
+sub dir_create
+{
+ # Martin A. Hansen, July 2007.
+
+ # Creates a directory.
+
+ my ( $path, # full path to dir
+ ) = @_;
+
+ # Returns nothing.
+
+ if ( -d $path ) {
+ &Maasha::Common::error( qq(Directory already exists "$path": $!) );
+ } else {
+ mkdir $path or &Maasha::Common::error( qq(Could not create directory "$path": $!) );
+ }
+}
+
+
+sub dir_create_if_not_exists
+{
+ # Martin A. Hansen, May 2008.
+
+ # Creates a directory if it does not already exists.
+
+ my ( $path, # full path to dir
+ ) = @_;
+
+ # Returns nothing.
+
+ if ( not -d $path ) {
+ mkdir $path or &Maasha::Common::error( qq(Could not create directory "$path": $!) );
+ }
+}
+
+
+sub dir_remove
+{
+ # Martin A. Hansen, April 2008.
+
+ # Removes a directory recursively.
+
+ my ( $path, # directory
+ ) = @_;
+
+ &Maasha::Common::run( "rm", "-rf $path" ) if -d $path;
+}
+
+
+sub ls_dirs
+{
+ # Martin A. Hansen, June 2007.
+
+ # returns all dirs in a given directory.
+
+ my ( $path, # full path to directory
+ ) = @_;
+
+ # returns a list of filenames.
+
+ my ( $dh, @dirs );
+
+ $dh = &open_dir( $path );
+
+ @dirs = &read_dir( $dh );
+ @dirs = grep { -d "$path/$_" } @dirs;
+
+ map { $_ = "$path/$_" } @dirs;
+
+ close $dh;
+
+ return wantarray ? @dirs : \@dirs;
+}
+
+
+sub ls_files
+{
+ # Martin A. Hansen, June 2007.
+
+ # returns all files in a given directory.
+
+ my ( $path, # full path to directory
+ ) = @_;
+
+ # returns a list of filenames.
+
+ my ( $dh, @files );
+
+ $dh = &open_dir( $path );
+
+ @files = &read_dir( $dh );
+ @files = grep { -f "$path/$_" } @files;
+
+ map { $_ = "$path/$_" } @files;
+
+ close $dh;
+
+ return wantarray ? @files : \@files;
+}
+
+
+sub open_dir
+{
+ # Martin A. Hansen, June 2007.
+
+ # open a directory and returns a directory handle
+
+ use IO::Dir;
+
+ my ( $path, # full path to directory
+ ) = @_;
+
+ # returns object
+
+ my $dh;
+
+ $dh = IO::Dir->new( $path ) or &Maasha::Common::error( qq(Could not open dir "$path": $!) );
+
+ return $dh;
+}
+
+
+sub read_dir
+{
+ # Martin A. Hansen, June 2007.
+
+ # read all files and directories from a directory.
+
+ my ( $dh, # directory handle object
+ ) = @_;
+
+ # returns list
+
+ my ( $elem, @elems );
+
+ while ( defined( $elem = $dh->read ) ) {
+ push @elems, $elem;
+ }
+
+ return wantarray ? @elems : \@elems;
+}
+
+
+sub read_args
+{
+ # Martin A. Hansen, December 2006
+
+ # reads arguments from @ARGV which is strictly formatted.
+ # three kind of argments are accepted:
+ # 1) file names [filename]
+ # 2) options with value [--option=value]
+ # 3) option without value [--option]
+
+ my ( $args, # list of arguments
+ $ok_args, # list of accepted arguments - OPTIONAL
+ ) = @_;
+
+ # returns a hashref
+
+ my ( %ok_hash, $arg, @dirs, @files, %hash );
+
+ foreach $arg ( @{ $args } )
+ {
+ if ( $arg =~ /^--([^=]+)=(.+)$/ ) {
+ $hash{ $1 } = $2;
+ } elsif ( $arg =~ /^--(.+)$/ ) {
+ $hash{ $1 } = 1;
+ } elsif ( -d $arg ) {
+ push @dirs, $arg;
+ } elsif ( -f $arg ) {
+ push @files, $arg;
+ } else {
+ &Maasha::Common::error( qq(Bad syntax in argument->"$arg") );
+ }
+ }
+
+ $hash{ "DIRS" } = \@dirs;
+ $hash{ "FILES" } = \@files;
+
+ if ( $ok_args )
+ {
+ map { $ok_hash{ $_ } = 1 } @{ $ok_args };
+
+ $ok_hash{ "DIRS" } = 1;
+ $ok_hash{ "FILES" } = 1;
+
+ map { &Maasha::Common::error( qq(Unknown argument->"$_") ) if not exists $ok_hash{ $_ } } keys %hash;
+ }
+
+ return wantarray ? %hash : \%hash;
+}
+
+
+sub get_sessionid
+{
+ # Martin A. Hansen, April 2008.
+
+ # Create a session id based on time and pid.
+
+ # Returns a number
+
+ return time . $$;
+}
+
+
+sub get_tmpdir
+{
+ # Martin A. Hansen, April 2008.
+
+ # Create a temporary directory based on
+ # $ENV{ 'TMP_DIR' } and sessionid.
+
+ # Returns a path.
+
+ my ( $user, $sid, $path );
+
+ &Maasha::Common::error( qq(no TMP_DIR set in %ENV) ) if not -d $ENV{ 'TMP_DIR' };
+
+ $user = $ENV{ 'USER' };
+ $user =~ s/\.//g;
+
+ $sid = &Maasha::Common::get_sessionid();
+
+ $path = "$ENV{ 'TMP_DIR' }/$user\_$sid";
+
+ &Maasha::Common::dir_create( $path );
+
+ return $path;
+}
+
+
+sub get_scriptname
+{
+ # Martin A. Hansen, February 2007
+
+ # returns the script name
+
+ return ( split "/", $0 )[ -1 ];
+}
+
+
+sub get_basename
+{
+ # Martin A. Hansen, February 2007
+
+ # Given a full path to a file returns the basename,
+ # which is the part of the name before the last '.'.
+
+ my ( $path, # full path to filename
+ ) = @_;
+
+ my ( $basename );
+
+ $basename = ( split "/", $path )[ -1 ];
+
+ $basename =~ s/(.+)\.?.*/$1/;
+
+ return $basename
+}
+
+
+sub file_read
+{
+ # Martin A. Hansen, December 2004.
+
+ # given a file, a seek beg position and
+ # length, returns the corresponding string.
+
+ my ( $fh, # file handle to file
+ $beg, # read start in file
+ $len, # read length of block
+ ) = @_;
+
+ # returns string
+
+ my ( $string );
+
+ &Maasha::Common::error( qq(Negative length: $len) ) if $len < 0;
+
+ sysseek $fh, $beg, 0;
+ sysread $fh, $string, $len;
+
+ return $string;
+}
+
+
+sub file_size
+{
+ # Martin A. Hansen, March 2007
+
+ # returns the file size for a given file
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ # returns integer
+
+ my $file_size = ( stat ( $path ) )[ 7 ];
+
+ return $file_size;
+}
+
+
+sub run
+{
+ # Martin A. Hansen, April 2007.
+
+ # Run an execute with optional arguments.
+
+ my ( $exe, # executable to run
+ $args, # argument string
+ $nice, # nice flag
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $command_line, $result );
+
+ $command_line = &Maasha::Config::get_exe( $exe );
+ $command_line .= " " . $args if $args;
+ $command_line = "nice -n19 " . $command_line if $nice;
+
+ system( $command_line ) == 0 or &Maasha::Common::error( qq(Could not execute "$command_line": $?) );
+}
+
+
+sub run_and_return
+{
+ # Martin A. Hansen, April 2008.
+
+ # Run an execute with optional arguments returning the output
+ # as a list.
+
+ my ( $exe, # executable to run
+ $args, # argument string
+ $nice, # nice flag
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $command_line, @result );
+
+ $command_line = &Maasha::Config::get_exe( $exe );
+ $command_line .= " " . $args if $args;
+ $command_line = "nice -n19 " . $command_line if $nice;
+
+ @result = `$command_line`;
+
+ chomp @result;
+
+ return wantarray ? @result : \@result;
+}
+
+
+sub time_stamp
+{
+ # Martin A. Hansen, February 2006.
+
+ # returns timestamp for use in log file.
+ # format: YYYY-MM-DD HH:MM:SS
+
+ # returns string
+
+ my ( $year, $mon, $day, $time );
+
+ ( undef, undef, undef, $day, $mon, $year, undef, undef ) = gmtime( time );
+
+ $mon += 1; # first month is 0, so we correct accordingly
+ $year += 1900;
+
+ $day = sprintf "%02d", $day;
+ $mon = sprintf "%02d", $mon;
+
+ $time = localtime;
+
+ $time =~ s/.*(\d{2}:\d{2}:\d{2}).*/$1/;
+
+ return "$year-$mon-$day $time";
+}
+
+
+sub wrap_line
+{
+ # Martin A. Hansen, May 2005
+
+ # Takes a given line and wraps it to a given width,
+ # without breaking any words.
+
+ my ( $line, # line to wrap
+ $width, # wrap width
+ ) = @_;
+
+ # Returns a list of lines.
+
+ my ( @lines, $substr, $wrap_pos, $pos, $new_line );
+
+ $pos = 0;
+
+ while ( $pos < length $line )
+ {
+ $substr = substr $line, $pos, $width;
+
+ if ( length $substr == $width )
+ {
+ $substr = reverse $substr;
+ $wrap_pos = index $substr, " ";
+
+ $new_line = substr $line, $pos, $width - $wrap_pos;
+ $new_line =~ s/ $//;
+
+ $pos += $width - $wrap_pos;
+ }
+ else
+ {
+ $new_line = $substr;
+
+ $pos += $width;
+ }
+
+ push @lines, $new_line;
+ }
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+1;
--- /dev/null
+package Maasha::Config;
+
+# Copyright (C) 2006 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# This module contains configuration details for the usual system setup, etc.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use vars qw( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> GLOBALS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $HOME, $PATH, $DATA_DIR, $TMP_DIR, $INST_DIR );
+
+$HOME = $ENV{ "HOME" };
+$PATH = $ENV{ "PATH" };
+$DATA_DIR = $ENV{ "DATA_DIR" };
+$TMP_DIR = $ENV{ "TMP_DIR" };
+$INST_DIR = $ENV{ "INST_DIR" };
+
+warn qq(WARNING: HOME not set in env\n) if not defined $HOME;
+warn qq(WARNING: PATH not set in env\n) if not defined $PATH;
+warn qq(WARNING: DATA_DIR not set in env\n) if not defined $DATA_DIR;
+warn qq(WARNING: TMP_DIR not set in env\n) if not defined $TMP_DIR;
+warn qq(WARNING: INST_DIR not set in env\n) if not defined $INST_DIR;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_exe
+{
+ # Martin A. Hansen, April 2007.
+
+ # finds a given exe in path and returns
+ # the full path to the exe.
+
+ my ( $exe,
+ ) = @_;
+
+ # returns string
+
+ my ( $dir, $ok );
+
+ foreach $dir ( split /:/, $PATH ) {
+ return "$dir/$exe" if -x "$dir/$exe" and not -d "$dir/$exe";
+ }
+
+ &Maasha::Common::error( qq(Could not find executable \'$exe\') );
+}
+
+
+sub genome_fasta
+{
+ # Martin A. Hansen, November 2007.
+
+ # Returns the full path to the FASTA file for
+ # a given genome.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns string.
+
+ my $genome_file = "$DATA_DIR/genomes/$genome/$genome.fna";
+
+ if ( not -f $genome_file ) {
+ &Maasha::Common::error( qq(Genome file "$genome_file" for genome "$genome" not found) );
+ }
+
+ return $genome_file;
+}
+
+
+sub genome_fasta_index
+{
+ # Martin A. Hansen, December 2007.
+
+ # Returns the full path to the FASTA file index for
+ # a given genome.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns string.
+
+ my $index = "$DATA_DIR/genomes/$genome/$genome.fna.index";
+
+ if ( not -f $index ) {
+ &Maasha::Common::error( qq(Index file "$index" for genome -> $genome not found) );
+ }
+
+ return $index;
+}
+
+
+sub genome_blast
+{
+ # Martin A. Hansen, November 2007.
+
+ # Returns the BLAST database path for a given genome.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns string.
+
+ my $file = "$DATA_DIR/genomes/$genome/blast/$genome.fna";
+
+ return $file;
+}
+
+
+sub genome_blat_ooc
+{
+ # Martin A. Hansen, November 2007.
+
+ # Returns the ooc file of a given tile size
+ # for a given genome.
+
+ my ( $genome, # requested genome
+ $tile_size, # blat tile size
+ ) = @_;
+
+ # Returns string.
+
+ my $ooc_file = "$DATA_DIR/genomes/$genome/blat/$tile_size.ooc";
+
+ &Maasha::Common::error( qq(ooc file "$ooc_file" not found for genome -> $genome) ) if not -f $ooc_file;
+
+ return $ooc_file;
+}
+
+
+sub genome_vmatch
+{
+ # Martin A. Hansen, November 2007.
+
+ # Returns a list of Vmatch index names for a given genome.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @chrs );
+
+ @chrs = &chromosomes( $genome );
+
+ map { $_ = "$DATA_DIR/genomes/$genome/vmatch/$_" } @chrs;
+
+ # needs robustness check
+
+ return wantarray ? @chrs : \@chrs;
+}
+
+
+sub genome_phastcons
+{
+ # Martin A. Hansen, January 2008.
+
+ # Returns the full path to the location of a concatenated
+ # PhastCons file for a given genome.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns a string.
+
+ my $file = "$DATA_DIR/genomes/$genome/phastcons/$genome.pp";
+
+ return $file;
+}
+
+
+sub genome_phastcons_index
+{
+ # Martin A. Hansen, January 2008.
+
+ # Returns the full path to the location of a PhastCons index
+ # for a given genome.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns a string.
+
+ my $file = "$DATA_DIR/genomes/$genome/phastcons/$genome.pp.index";
+
+ return $file;
+}
+
+
+sub genomes
+{
+ # Martin A. Hansen, February 2008.
+
+ # Returns a list of available genomes in the,
+ # genomes.conf file.
+
+ # Returns a list.
+
+ my ( %genome_hash, $fh, $line, @genomes, $org );
+
+ $fh = &Maasha::Common::read_open( "$INST_DIR/conf/genomes.conf" );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ next if $line eq "//";
+
+ ( $org, undef ) = split "\t", $line;
+
+ $genome_hash{ $org } = 1;
+ }
+
+ close $fh;
+
+ @genomes = sort keys %genome_hash;
+
+ return wantarray ? @genomes : \@genomes;
+}
+
+
+sub chromosomes
+{
+ # Martin A. Hansen, November 2007.
+
+ # Returns a list of chromosome files for a given genome
+ # read from the genomes.conf file.
+
+ my ( $genome, # requested genome
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $fh_in, $line, $org, $chr, %genome_hash, @chrs );
+
+ $fh_in = &Maasha::Common::read_open( "$INST_DIR/conf/genomes.conf" );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line eq "//";
+
+ ( $org, $chr ) = split "\t", $line;
+
+ push @{ $genome_hash{ $org } }, $chr;
+ }
+
+ close $fh_in;
+
+ if ( exists $genome_hash{ $genome } ) {
+ @chrs = @{ $genome_hash{ $genome } };
+ } else {
+ &Maasha::Common::error( qq(Genome -> $genome not found in genome hash) );
+ }
+
+ return wantarray ? @chrs : \@chrs;
+}
+
+
+sub maf_track
+{
+ # Martin A. Hansen, April 2008.
+
+ # Given a genome returns the corresponding mafTrack database table name.
+
+ my ( $genome, # genome to lookup.
+ ) = @_;
+
+ # Returns a string.
+
+ my ( %hash );
+
+ # The below has should be in a config file - fix later.
+
+ %hash = (
+ danRer4 => 'multiz7way',
+ dm2 => 'multiz15way',
+ dm3 => 'multiz15way',
+ fr2 => 'multiz7way',
+ galGal3 => 'multiz7way',
+ gasAcu1 => 'multiz7way',
+ hg18 => 'multiz17way',
+ mm8 => 'multiz17way',
+ mm9 => 'multiz17way',
+ oryLat1 => 'multiz7way',
+ panTro2 => 'multiz17way',
+ tetNig1 => 'multiz7way',
+ );
+
+ &Maasha::Common::error( qw(multiz track not found) ) if not exists $hash{ $genome };
+
+ return $hash{ $genome };
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+1;
--- /dev/null
+package Maasha::DumpFunc;
+
+# Copyright (C) 2003 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines to inspect objects and their inheritance.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Class::Inspector;
+use vars qw ( @ISA @EXPORT );
+use Exporter;
+
+use Data::Dumper;
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub dump_func
+{
+ # Martin A. Hansen, August 2003.
+
+ # given an object, the cognate functions are returned as a list
+
+ my ( $obj, # incomming object
+ ) = @_;
+
+ my ( $ref, $methods );
+
+ $ref = ref $obj;
+ $methods = Class::Inspector->methods( $ref, 'full', 'public' );
+
+# @{ $methods } = grep /$ref/, @{ $methods };
+
+ return wantarray ? @{ $methods } : $methods;
+}
+
+
+sub dump_test
+{
+ # Martin A. Hansen, August 2003.
+
+ # given an object, returns the cognate function run with default values
+
+ my ( $obj , # incomming object
+ ) = @_;
+
+ # returns a list of test lines to be printed
+
+ my ( $methods, $method, $function, @lines );
+
+ $methods = dump_func( $obj );
+
+ foreach $method ( @{ $methods } )
+ {
+ $method =~ /::(\w+)$/;
+ $function = $1;
+ next if not eval { $obj->$function };
+
+ # push @lines, "Testing $function from $method --- Returns -> " . $obj->$function;
+ print "TESTING $function FROM $method: RETURNS->" . $obj->$function . "\n";
+ }
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::EMBL;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines to parse EMBL records.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Storable qw( dclone );
+use Maasha::Common;
+use Maasha::Fasta;
+use Maasha::Calc;
+use Maasha::Seq;
+use vars qw ( @ISA @EXPORT );
+
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_embl_entry
+{
+ # Martin A. Hansen, June 2006.
+
+ # Given a filehandle to an embl file,
+ # fetches the next embl entry, and returns
+ # this as a string with multiple lines.
+
+ my ( $fh, # filehandle to embl file
+ ) = @_;
+
+ # returns string
+
+ my ( $entry );
+
+ $/ = "//\n";
+
+ $entry = <$fh>;
+
+ return $entry;
+}
+
+
+sub parse_embl_entry
+{
+ # Martin A. Hansen, June 2006.
+
+ # given an embl entry extracts the keys
+ # given in an argument hash. Special care
+ # is taken to parse the feature table if
+ # requested.
+
+ my ( $entry, # embl entry
+ $args, # argument hash
+ ) = @_;
+
+ # returns data structure
+
+ my ( @lines, $line, %hash, $ft, $seq, $key );
+
+ @lines = split "\n", $entry;
+
+ foreach $line ( @lines )
+ {
+ if ( exists $args->{ "keys" } )
+ {
+ if ( $line =~ /^(\w{2})\s+(.*)/ and exists $args->{ "keys" }->{ $1 } )
+ {
+ if ( exists $hash{ $1 } and $1 eq "FT" ) {
+ $hash{ $1 } .= "\n" . $2;
+ } elsif ( exists $hash{ $1 } ) {
+ $hash{ $1 } .= " " . $2;
+ } else {
+ $hash{ $1 } = $2;
+ }
+ }
+ elsif ( $line =~ /^\s+(.*)\s+\d+$/ and exists $args->{ "keys" }->{ "SEQ" } )
+ {
+ $seq .= $1;
+ }
+ }
+ else
+ {
+ if ( $line =~ /^(\w{2})\s+(.*)/ )
+ {
+ if ( exists $hash{ $1 } and $1 eq "FT" ) {
+ $hash{ $1 } .= "\n" . $2;
+ } elsif ( exists $hash{ $1 } ) {
+ $hash{ $1 } .= " " . $2;
+ } else {
+ $hash{ $1 } = $2;
+ }
+ }
+ elsif ( $line =~ /^\s+(.*)\s+\d+$/ )
+ {
+ $seq .= $1;
+ }
+ }
+ }
+
+ if ( $seq )
+ {
+ $seq =~ tr/ //d;
+ $hash{ "SEQ" } = $seq;
+ }
+
+# foreach $key ( keys %hash )
+# {
+# next if $key =~ /^(SEQ|SEQ_FT|FT)/;
+#
+# if ( not $hash{ $key } =~ /$args->{ $key }/i ) {
+# return wantarray ? () : {} ;
+# }
+# }
+
+ if ( exists $hash{ "FT" } )
+ {
+ $seq =~ tr/ //d;
+ $ft = &parse_feature_table( $hash{ "FT" }, $seq, $args );
+ $hash{ "FT" } = $ft;
+ }
+
+ return wantarray ? %hash : \%hash;
+}
+
+
+sub parse_feature_table
+{
+ # Martin A. Hansen, June 2006.
+
+ # parses the feature table of a EMBL/GenBank/DDBJ entry.
+ # parsing takes place in 4 steps. 1) the feature key is
+ # located. 2) the locator is located taking into # consideration
+ # that it may be split over multiple lines, which is dealt with
+ # by counting the params that always are present in multiline
+ # locators. 3) the locator is used to fetch the corresponding
+ # sequence. 4) qualifier key/value pars are located again taking
+ # into consideration multiline values, which are dealt with by
+ # keeping track of the "-count (value-less qualifers are also
+ # included). only feature keys and qualifers defined in the
+ # argument hash are returned.
+
+ my ( $ft, # feature table
+ $seq, # entry sequnce
+ $args, # argument hash
+ ) = @_;
+
+ # returns data structure
+
+ my ( @lines, $key_regex, $i, $p, $q, %key_hash, $key, $locator, %qual_hash, $qual_name, $qual_val, $subseq );
+
+ @lines = split "\n", $ft;
+
+ $key_regex = "[A-Za-z0-9_']+"; # this regex should match every possible feature key (gene, misc_feature, 5'UTR ...)
+
+ $i = 0;
+
+ while ( $lines[ $i ] )
+ {
+ if ( $lines[ $i ] =~ /^($key_regex)\s+(.+)/ )
+ {
+ $key = $1;
+ $locator = $2;
+
+ undef %qual_hash;
+
+ # ---- getting locator
+
+ $p = 1;
+
+ if ( not &balance_params( $locator ) )
+ {
+ while ( not &balance_params( $locator ) )
+ {
+ $locator .= $lines[ $i + $p ];
+ $p++;
+ }
+ }
+
+ push @{ $qual_hash{ "_locator" } }, $locator;
+
+ # ---- getting subsequence
+
+ $subseq = &parse_locator( $locator, $seq );
+
+ push @{ $qual_hash{ "_seq" } }, $subseq;
+
+ # ----- getting qualifiers
+
+ while ( defined( $lines[ $i + $p ] ) and not $lines[ $i + $p ] =~ /^$key_regex/ )
+ {
+ if ( $lines[ $i + $p ] =~ /^\// )
+ {
+ if ( $lines[ $i + $p ] =~ /^\/([^=]+)=(.*)$/ )
+ {
+ $qual_name = $1;
+ $qual_val = $2;
+ }
+ elsif ( $lines[ $i + $p ] =~ /^\/(.*)$/ )
+ {
+ $qual_name = $1;
+ $qual_val = "";
+ }
+
+ # ----- getting qualifier value
+
+ $q = 1;
+
+ if ( not &balance_quotes( $qual_val ) )
+ {
+ while ( not &balance_quotes( $qual_val ) )
+ {
+ $qual_val .= " " . $lines[ $i + $p + $q ];
+ $q++;
+ }
+ }
+
+ $qual_val =~ s/^"(.*)"$/$1/;
+ $qual_val =~ tr/ //d if $qual_name =~ /translation/i;
+
+ if ( exists $args->{ "quals" } ) {
+ push @{ $qual_hash{ $qual_name } }, $qual_val if exists $args->{ "quals" }->{ $qual_name };
+ } else {
+ push @{ $qual_hash{ $qual_name } }, $qual_val;
+ }
+ }
+
+ $p += $q;
+ }
+
+ if ( scalar keys %qual_hash > 0 )
+ {
+ if ( exists $args->{ "feats" } ) {
+ push @{ $key_hash{ $key } }, dclone \%qual_hash if exists $args->{ "feats" }->{ $key };
+ } else {
+ push @{ $key_hash{ $key } }, dclone \%qual_hash;
+ }
+ }
+ }
+
+ $i += $p;
+ }
+
+ return wantarray ? %key_hash : \%key_hash;
+}
+
+
+sub parse_locator
+{
+ # Martin A. Hansen, June 2006.
+
+ # uses recursion to parse a locator string from a feature
+ # table and fetches the appropriate subsequence. the operators
+ # join(), complement(), and order() are handled.
+ # the locator string is broken into a comma separated lists, and
+ # modified if the params donnot balance. otherwise the comma separated
+ # list of ranges are stripped from operators, and the subsequence are
+ # fetched and handled according to the operators.
+ # SNP locators are also dealt with (single positions).
+
+ my ( $locator, # locator string
+ $seq, # nucleotide sequence
+ $subseq, # subsequence being joined
+ $join, # join sequences
+ $comp, # complement sequence or not
+ $order, # order sequences
+ ) = @_;
+
+ # returns string
+
+ my ( @intervals, $interval, $beg, $end, $newseq );
+
+ @intervals = split ",", $locator;
+
+ if ( not &balance_params( $intervals[ 0 ] ) ) # locator includes a join/comp/order of several ranges
+ {
+ if ( $locator =~ /^join\((.*)\)$/ )
+ {
+ $join = 1;
+ $subseq = &parse_locator( $1, $seq, $subseq, $join, $comp, $order );
+ }
+ elsif ( $locator =~ /^complement\((.*)\)$/ )
+ {
+ $comp = 1;
+ $subseq = &parse_locator( $1, $seq, $subseq, $join, $comp, $order );
+
+ }
+ elsif ( $locator =~ /^order\((.*)\)$/ )
+ {
+ $order = 1;
+ $subseq = &parse_locator( $1, $seq, $subseq, $join, $comp, $order );
+ }
+ }
+ else
+ {
+ foreach $interval ( @intervals )
+ {
+ if ( $interval =~ /^join\((.*)\)$/ )
+ {
+ $join = 1;
+ $subseq = &parse_locator( $1, $seq, $subseq, $join, $comp, $order );
+ }
+ elsif ( $interval =~ /^complement\((.*)\)$/ )
+ {
+ $comp = 1;
+ $subseq = &parse_locator( $1, $seq, $subseq, $join, $comp, $order );
+
+ }
+ elsif ( $interval =~ /^order\((.*)\)$/ )
+ {
+ $order = 1;
+ $subseq = &parse_locator( $1, $seq, $subseq, $join, $comp, $order );
+ }
+ elsif ( $interval =~ /^[<>]?(\d+)[^\d]+(\d+)$/ )
+ {
+ $beg = $1;
+ $end = $2;
+
+ $newseq = substr $seq, $beg - 1, $end - $beg + 1;
+
+ $newseq = &Maasha::Seq::dna_revcomp( $newseq ) if $comp;
+
+ if ( $order ) {
+ $subseq .= " " . $newseq;
+ } else {
+ $subseq .= $newseq;
+ }
+ }
+ elsif ( $interval =~ /^(\d+)$/ )
+ {
+ $beg = $1;
+
+ $newseq = substr $seq, $beg - 1, 1 ;
+
+ $newseq = &Maasha::Seq::dna_revcomp( $newseq ) if $comp;
+
+ if ( $order ) {
+ $subseq .= " " . $newseq;
+ } else {
+ $subseq .= $newseq;
+ }
+ }
+ else
+ {
+ warn qq(WARNING: Could not match locator -> $locator\n);
+ # die qq(ERROR: Could not match locator -> $locator\n);
+ $subseq .= "";
+ }
+ }
+ }
+
+ return $subseq;
+}
+
+
+sub balance_params
+{
+ # Martin A. Hansen, June 2006.
+
+ # given a string checks if left and right params
+ # balances. returns 1 if balanced, else 0.
+
+ my ( $string, # string to check
+ ) = @_;
+
+ # returns boolean
+
+ my ( $param_count );
+
+ $param_count = 0;
+ $param_count += $string =~ tr/(//;
+ $param_count -= $string =~ tr/)//;
+
+ if ( $param_count == 0 ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+
+sub balance_quotes
+{
+ # Martin A. Hansen, June 2006.
+
+ # given a string checks if the number of double quotes
+ # balances. returns 1 if balanced, else 0.
+
+ my ( $string, # string to check
+ ) = @_;
+
+ # returns boolean
+
+ my ( $quote_count );
+
+ $quote_count = $string =~ tr/"//;
+
+ if ( $quote_count % 2 == 0 ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::Fasta;
+
+# Copyright (C) 2006 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for manipulation of FASTA files and FASTA entries.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Maasha::Common;
+use Maasha::Seq;
+use vars qw ( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+use constant {
+ HEAD => 0,
+ SEQ => 1,
+};
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub fasta_format_ok
+{
+ # Martin A. Hansen, March 2007.
+
+ # Checks if a given FASTA file is formatted with
+ # one header per line and one sequence per line.
+ # returns 1 if so, else 0.
+
+ my ( $path, # full path to FASTA file
+ ) = @_;
+
+ # Returns boolean
+
+ my ( $fh, $line, $count );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ $count = 0;
+
+ while ( $line = <$fh> )
+ {
+ if ( not $count % 2 and substr( $line, 0, 1 ) ne ">" ) {
+ return 0;
+ }
+
+ $count++;
+ }
+
+ close $fh;
+
+ return 1;
+}
+
+
+sub get_entries
+{
+ # Martin A. Hansen, December 2006.
+
+ # Parses a fasta file and returns a list of headers and sequence tuples.
+
+ my ( $path, # full path to FASTA file
+ $count, # number of sequences to read - OPTIONAL
+ ) = @_;
+
+ # returns list of tuples
+
+ my ( $fh, $entry, @entries );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $entry = &get_entry( $fh ) )
+ {
+ push @entries, $entry;
+
+ if ( $count and $count == @entries ) {
+ last;
+ }
+ }
+
+ close $fh;
+
+ return wantarray ? @entries : \@entries;
+}
+
+
+sub put_entries
+{
+ # Martin A. Hansen, March 2004.
+
+ # writes fasta sequences to STDOUT or file
+
+ my ( $entries, # list of fasta entries
+ $path, # full path to file - OPTIONAL
+ $wrap, # line width - OPTIONAL
+ ) = @_;
+
+ my ( $fh );
+
+ $fh = &Maasha::Common::write_open( $path ) if $path;
+
+ map { &put_entry( $_, $fh, $wrap ) } @{ $entries };
+
+ close $fh if defined;
+}
+
+
+sub wrap
+{
+ # Martin A. Hansen, June 2007
+
+ # Wraps the sequence of a given FASTA entry
+ # to a given length.
+
+ my ( $entry, # FASTA entry
+ $wrap, # wrap length
+ ) = @_;
+
+ # Returns nothing.
+
+ &Maasha::Seq::wrap( \$entry->[ SEQ ], $wrap );
+}
+
+
+sub get_entry
+{
+ # Martin A. Hansen, January 2007.
+
+ # Given a filehandle to an FASTA file,
+ # fetches the next FASTA entry, and returns
+ # this as a tuple of [ header, sequence ].
+
+ my ( $fh, # filehandle to FASTA file
+ ) = @_;
+
+ # Returns string.
+
+ my ( $block, @lines, $seq_name, $seq, $entry );
+
+ local $/ = "\n>";
+
+ while ( $block = <$fh> )
+ {
+ chomp $block;
+
+ last if $block !~ /^\s+$/;
+ }
+
+ return if not defined $block;
+
+ $block =~ />?([^\n]+)\n/m;
+ $seq_name = $1;
+ $seq = $';
+
+ local $/ = "\n";
+
+ chomp $seq;
+
+ $seq =~ tr/ \t\n//d;
+
+ $entry = [ $seq_name, $seq ];
+
+ return wantarray ? @{ $entry } : $entry;
+}
+
+
+sub put_entry
+{
+ # Martin A. Hansen, January 2007.
+
+ # Writes FASTA entries to STDOUT or file.
+
+ my ( $entry, # a FASTA entries
+ $fh, # file handle to output file - OPTIONAL
+ $wrap, # line width - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ &Maasha::Common::error( qq(FASTA entry has no header) ) if not defined $entry->[ HEAD ];
+ &Maasha::Common::error( qq(FASTA entry has no sequence) ) if not defined $entry->[ SEQ ];
+
+ if ( $wrap ) {
+ &Maasha::Fasta::wrap( $entry, $wrap );
+ }
+
+ if ( defined $fh ) {
+ print $fh ">$entry->[ HEAD ]\n$entry->[ SEQ ]\n";
+ } else {
+ print ">$entry->[ HEAD ]\n$entry->[ SEQ ]\n";
+ }
+}
+
+
+sub find_shortest
+{
+ # Martin A. Hansen, June 2007.
+
+ # Given a stack of FASTA entries, find and return
+ # the shortest entry.
+
+ my ( $entries, # list of FASTA entries
+ ) = @_;
+
+ # returns tuple
+
+ my ( $min, $entry, $min_entry );
+
+ $min = 99999999999;
+
+ foreach $entry ( @{ $entries } )
+ {
+ if ( length( $entry->[ SEQ ] ) < $min )
+ {
+ $min_entry = $entry;
+ $min = length $entry->[ SEQ ];
+ }
+ }
+
+ return wantarray ? @{ $min_entry } : $min_entry;
+}
+
+
+sub find_longest
+{
+ # Martin A. Hansen, June 2007.
+
+ # Given a stack of FASTA entries, find and return
+ # the longest entry.
+
+ my ( $entries, # list of FASTA entries
+ ) = @_;
+
+ # returns tuple
+
+ my ( $max, $entry, $max_entry );
+
+ $max = 0;
+
+ foreach $entry ( @{ $entries } )
+ {
+ if ( length( $entry->[ SEQ ] ) > $max )
+ {
+ $max_entry = $entry;
+ $max = length $entry->[ SEQ ];
+ }
+ }
+
+ return wantarray ? @{ $max_entry } : $max_entry;
+}
+
+
+sub fasta_get_headers
+{
+ # Martin A. Hansen, May 2007.
+
+ # Gets the header names of a FASTA file,
+ # and returns these in a list.
+
+ my ( $path, # full path to FASTA file
+ ) = @_;
+
+ # returns list
+
+ my ( $fh, $entry, @list );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $entry = &get_entry( $fh ) ) {
+ push @list, $entry->[ HEAD ];
+ }
+
+ close $fh;
+
+ return wantarray ? @list : \@list;
+}
+
+
+sub fasta_reformat
+{
+ # Martin A. Hansen, December 2004.
+
+ # Given a file of one or more FASTA entries, reformats these so
+ # each entry consits of one line with header and one line with sequence.
+
+ my ( $path, # full path to file with multiple FASTA entries
+ ) = @_;
+
+ my ( $fh_in, $fh_out, $entry );
+
+ $fh_in = &Maasha::Common::read_open( $path );
+ $fh_out = &Maasha::Common::write_open( "$path.temp" );
+
+ while ( $entry = &get_entry( $fh_in ) ) {
+ &put_entry( $entry, $fh_out );
+ }
+
+ close $fh_in;
+ close $fh_out;
+
+ rename( "$path.temp", $path );
+}
+
+
+sub index_create
+{
+ # Matin A. Hansen, December 2004.
+
+ # Given a FASTA file formatted with one line of header and one line of sequence,
+ # returns a list of header, seq beg and seq length (first nucleotide is 0). Also,
+ # the file size of the indexed file is written to the index for checking purposes.
+
+ my ( $path, # full path to file with multiple FASTA entries
+ ) = @_;
+
+ # returns a hashref
+
+ my ( $file_size, $fh, $entry, $beg, $len, %hash, @index );
+
+ $file_size = &Maasha::Common::file_size( $path );
+
+ push @index, "FILE_SIZE=$file_size";
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ $beg = 0;
+ $len = 0;
+
+ while ( $entry = &get_entry( $fh ) )
+ {
+ warn qq(WARNING: header->$entry->[ HEAD ] alread exists in index) if exists $hash{ $entry->[ HEAD ] };
+
+ $beg += $len + 2 + length $entry->[ HEAD ];
+ $len = length $entry->[ SEQ ];
+
+ push @index, [ $entry->[ HEAD ], $beg, $len ];
+
+ $hash{ $entry->[ HEAD ] } = 1;
+
+ $beg++;
+ }
+
+ close $fh;
+
+ return wantarray ? @index : \@index;
+}
+
+
+sub index_search
+{
+ # Martin A. Hansen, December 2004.
+
+ # Searches the index for matching entries.
+
+ my ( $index, # index list
+ $regex, # regex to match FASTA headers [OPTIONAL]
+ $invert, # invert matching
+ ) = @_;
+
+ # returns list
+
+ my ( @results );
+
+ if ( not $regex )
+ {
+ @results = @{ $index };
+ }
+ else
+ {
+ if ( $invert ) {
+ @results = grep { $_->[ 0 ] !~ /$regex/ } @{ $index };
+ } else {
+ @results = grep { $_->[ 0 ] =~ /$regex/ } @{ $index };
+ }
+ }
+
+ return wantarray ? @results : \@results;
+}
+
+
+sub index_lookup
+{
+ # Martin A. Hansen, July 2007.
+
+ # Lookup a list of exact matches in the index and returns these
+
+ my ( $index, # index list
+ $headers, # headers to lookup
+ ) = @_;
+
+ # returns a list
+
+ my ( %hash, $head, @results );
+
+ map { $hash{ $_->[ 0 ] } = [ $_->[ 1 ], $_->[ 2 ] ] } @{ $index };
+
+ foreach $head ( @{ $headers } )
+ {
+ if ( exists $hash{ $head } ) {
+ push @results, [ $head, $hash{ $head }->[ 0 ], $hash{ $head }->[ 1 ] ];
+ }
+ }
+
+ return wantarray ? @results : \@results;
+}
+
+
+sub index_store
+{
+ # Martin A. Hansen, May 2007.
+
+ # Stores a FASTA index to binary file.
+
+ my ( $path, # full path to file
+ $index, # list with index
+ ) = @_;
+
+ # returns nothing
+
+ &Maasha::Common::file_store( $path, $index );
+}
+
+
+sub index_retrieve
+{
+ # Martin A. Hansen, May 2007.
+
+ # Retrieves a FASTA index from binary file.
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ # returns list
+
+ my $index;
+
+ $index = &Maasha::Common::file_retrieve( $path );
+
+ return wantarray ? @{ $index } : $index;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::GFF;
+
+
+# Copyright (C) 2007-2008 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for manipulation 'Generic Feature Format' - GFF.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Maasha::Common;
+
+use vars qw( @ISA @EXPORT_OK );
+
+require Exporter;
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_entry
+{
+ # Martin A. Hansen, February 2008.
+
+ # Reads a single entry from a filehandle to a GFF file.
+
+ my ( $fh, # file handle
+ ) = @_;
+
+ # Returns hashref.
+
+ my ( $line, @fields, %entry, $q_beg, $q_end, @atts, $att, $key, $val );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ @fields = split "\t", $line;
+
+ if ( @fields == 9 )
+ {
+ $q_beg = $fields[ 3 ] - 1;
+ $q_end = $fields[ 4 ] - 1;
+
+ ( $q_beg, $q_end ) = ( $q_end, $q_beg ) if $q_beg > $q_end;
+
+ %entry = (
+ Q_ID => $fields[ 0 ],
+ SOURCE => $fields[ 1 ],
+ TYPE => $fields[ 2 ],
+ Q_BEG => $q_beg,
+ Q_END => $q_end,
+ SCORE => $fields[ 5 ],
+ STRAND => $fields[ 6 ],
+ PHASE => $fields[ 7 ],
+ ATT => $fields[ 8 ],
+ );
+
+ @atts = split ";", $fields[ 8 ];
+
+ foreach $att ( @atts )
+ {
+ ( $key, $val ) = split "=", $att;
+
+ $entry{ "ATT_" . uc $key } = $val;
+ }
+
+ return wantarray ? %entry : \%entry;
+ }
+ }
+}
+
+
+sub get_entries
+{
+ # Martin A. Hansen, February 2008.
+
+ # Reads GFF file and returns a list of entries.
+
+ my ( $path, # full path to GFF file.
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $fh, $entry, @entries );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $entry = &get_entry( $fh ) ) {
+ push @entries, $entry;
+ }
+
+ close $fh;
+
+ return wantarray ? @entries : \@entries;
+}
+
+
+sub put_entry
+{
+
+
+}
+
+
+sub put_entries
+{
+
+
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+1;
--- /dev/null
+package Maasha::Match;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines to match sequences
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Storable qw( dclone );
+use Maasha::Common;
+use Maasha::Fasta;
+use Maasha::Seq;
+use Maasha::Berkeley_DB;
+use vars qw ( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub match_mummer
+{
+ # Martin A. Hansen, June 2007.
+
+ # Match sequences using MUMmer.
+
+ my ( $entries1, # FASTA entries
+ $entries2, # FASTA entries
+ $options, # additional MUMmer options - OPTIONAL
+ $tmp_dir, # temporary directory
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @args, $arg, $file_in1, $file_in2, $cmd, $file_out, $fh, $line, $result, @results );
+
+ $tmp_dir ||= $ENV{ "TMP_DIR" };
+
+ $options->{ "word_size" } ||= 20;
+ $options->{ "direction" } ||= "both";
+
+ push @args, "-c";
+ push @args, "-L";
+ push @args, "-F";
+ push @args, "-l $options->{ 'word_size' }";
+ push @args, "-maxmatch";
+ push @args, "-n" if not &Maasha::Seq::seq_guess_type( $entries1->[ 0 ]->[ 1 ] ) eq "protein";
+ push @args, "-b" if $options->{ "direction" } =~ /^b/;
+ push @args, "-r" if $options->{ "direction" } =~ /^r/;
+
+ $arg = join " ", @args;
+
+ $file_in1 = "$tmp_dir/muscle1.tmp";
+ $file_in2 = "$tmp_dir/muscle2.tmp";
+ $file_out = "$tmp_dir/muscle3.tmp";
+
+ map { $_->[ 0 ] =~ tr/ /_/ } @{ $entries1 };
+ map { $_->[ 0 ] =~ tr/ /_/ } @{ $entries2 };
+
+ &Maasha::Fasta::put_entries( $entries1, $file_in1 );
+ &Maasha::Fasta::put_entries( $entries2, $file_in2 );
+
+ &Maasha::Common::run( "mummer", "$arg $file_in1 $file_in2 > $file_out 2>/dev/null" );
+
+ $fh = &Maasha::Common::read_open( $file_out );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^> (.+)Reverse\s+Len = (\d+)$/ )
+ {
+ $result->{ "Q_ID" } = $1;
+ $result->{ "Q_LEN" } = $2;
+ $result->{ "DIR" } = "reverse";
+ }
+ elsif ( $line =~ /^> (.+)Len = (\d+)$/ )
+ {
+ $result->{ "Q_ID" } = $1;
+ $result->{ "Q_LEN" } = $2;
+ $result->{ "DIR" } = "forward";
+ }
+ elsif ( $line =~ /^\s*(.\S+)\s+(\d+)\s+(\d+)\s+(\d+)$/ )
+ {
+ $result->{ "S_ID" } = $1;
+ $result->{ "S_BEG" } = $2 - 1;
+ $result->{ "Q_BEG" } = $3 - 1;
+ $result->{ "HIT_LEN" } = $4;
+ $result->{ "S_END" } = $result->{ "S_BEG" } + $result->{ "HIT_LEN" } - 1;
+ $result->{ "Q_END" } = $result->{ "Q_BEG" } + $result->{ "HIT_LEN" } - 1;
+
+ push @results, dclone $result;
+ }
+
+ }
+
+ unlink $file_in1;
+ unlink $file_in2;
+ unlink $file_out;
+
+ return wantarray ? @results : \@results;
+}
+
+
+sub match_vmatch
+{
+ # Martin A. Hansen, April 2008.
+
+ # Vmatches a list of records against a list of index files and the full
+ # path to the result file is returned.
+
+ my ( $tmp_dir, # directory in where to save temp files
+ $records, # list of records
+ $index_files, # list of index files
+ $options, # argument hash
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $query_file, $result_file, @result_files, $fh_in, $fh_out, $line, @fields, $i, $record, $vmatch_args, @index_names, @seq_names, $count_list );
+
+ $query_file = "$tmp_dir/query.seq";
+ $result_file = "$tmp_dir/vmatch.out";
+
+ $fh_out = &Maasha::Common::write_open( $query_file );
+
+ foreach $record ( @{ $records } )
+ {
+ if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } )
+ {
+ next if length $record->{ "SEQ" } < 12; # assuming that the index is created for 12 as minimum length
+
+ push @seq_names, $record->{ "SEQ_NAME" };
+
+ &Maasha::Fasta::put_entry( [ $record->{ "SEQ_NAME" }, $record->{ "SEQ" } ], $fh_out, 80 );
+ }
+ }
+
+ close $fh_out;
+
+ if ( $options->{ 'genome' } ) {
+ $vmatch_args = "-complete -d -p -q $query_file";
+ } else {
+ $vmatch_args = "-complete -d -p -showdesc 100 -q $query_file";
+ }
+
+ $vmatch_args .= " -h " . $options->{ "hamming_dist" } if $options->{ "hamming_dist" };
+ $vmatch_args .= " -e " . $options->{ "edit_dist" } if $options->{ "edit_dist" };
+
+ for ( $i = 0; $i < @{ $index_files }; $i++ )
+ {
+ &Maasha::Common::run( "vmatch", "$vmatch_args $index_files->[ $i ] > $result_file.$i" );
+
+ push @result_files, "$result_file.$i";
+ }
+
+ unlink $query_file;
+
+ $count_list = &vmatch_count_hits( \@result_files ) if ( $options->{ "count" } );
+
+ $fh_out = &Maasha::Common::write_open( $result_file );
+
+ for ( $i = 0; $i < @{ $index_files }; $i++ )
+ {
+ $index_files->[ $i ] =~ s/.+\/(.+)\.fna$/$1/ if $options->{ 'genome' };
+
+ $fh_in = &Maasha::Common::read_open( "$result_file.$i" );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split " ", $line;
+
+ next if $options->{ "max_hits" } and $count_list->[ $fields[ 5 ] ] > $options->{ 'max_hits' };
+
+ $fields[ 1 ] = $index_files->[ $i ]; # S_ID
+ $fields[ 9 ] = $count_list->[ $fields[ 5 ] ] if $options->{ "count" }; # SCORE
+ $fields[ 5 ] = $seq_names[ $fields[ 5 ] ]; # Q_ID
+
+ print $fh_out join( "\t", @fields ), "\n";
+ }
+
+ close $fh_in;
+
+ unlink "$result_file.$i";
+ }
+
+ close $fh_out;
+
+ return $result_file;
+}
+
+
+sub vmatch_count_hits
+{
+ # Martin A. Hansen, April 2008.
+
+ # Given a list of Vmatch result file, count duplications based
+ # on q_id. The counts are returned in a list where the list index
+ # corresponds to the q_id index in the query file.
+
+ my ( $files, # vmatch result files
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $file, $fh_in, $line, @fields, @count_list );
+
+ foreach $file ( @{ $files } )
+ {
+ $fh_in = &Maasha::Common::read_open( $file );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split " ", $line;
+
+ $count_list[ $fields[ 5 ] ]++;
+ }
+
+ close $fh_in;
+ }
+
+ return wantarray ? @count_list : \@count_list;
+}
+
+
+sub vmatch_count_hits_old
+{
+ # Martin A. Hansen, April 2008.
+
+ # Given a Vmatch result file, substitute the
+ # score field with the times the query sequence
+ # was found.
+
+ my ( $tmp_dir, # directory in where to save temp files
+ $path, # full path to vmatch file
+ $max_count, # filter too abundant seqs - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh_in, $fh_out, $line, @fields, @count_list );
+
+ $fh_in = &Maasha::Common::read_open( $path );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split " ", $line;
+
+ $count_list[ $fields[ 5 ] ]++;
+ }
+
+ close $fh_in;
+
+ $fh_in = &Maasha::Common::read_open( $path );
+ $fh_out = &Maasha::Common::write_open( "$tmp_dir/vmatch.count" );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split " ", $line;
+
+ $fields[ 9 ] = $count_list[ $fields[ 5 ] ];
+
+ if ( $max_count ) {
+ print $fh_out join( "\t", @fields ), "\n" if $fields[ 9 ] <= $max_count;
+ } else {
+ print $fh_out join( "\t", @fields ), "\n";
+ }
+ }
+
+ close $fh_in;
+ close $fh_out;
+
+ rename "$tmp_dir/vmatch.count", $path;
+}
+
+
+sub vmatch_count_hits_old
+{
+ # Martin A. Hansen, April 2008.
+
+ # Given a Vmatch result file, substitute the
+ # score field with the times the query sequence
+ # was found.
+
+ my ( $tmp_dir, # directory in where to save temp files
+ $path, # full path to vmatch file
+ $max_count, # filter too abundant seqs - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh_in, $fh_out, $line, @fields, %count_hash );
+
+ if ( $max_count ) {
+ %count_hash = ();
+ } else {
+ %count_hash = &Maasha::Berkeley_DB::db_init( "$tmp_dir/hash.bdb" );
+ }
+
+ $fh_in = &Maasha::Common::read_open( $path );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split " ", $line;
+
+ $count_hash{ $fields[ 5 ] }++;
+ }
+
+ close $fh_in;
+
+ $fh_in = &Maasha::Common::read_open( $path );
+ $fh_out = &Maasha::Common::write_open( "$tmp_dir/vmatch.count" );
+
+ while ( $line = <$fh_in> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split " ", $line;
+
+ $fields[ 9 ] = $count_hash{ $fields[ 5 ] };
+
+ if ( $max_count ) {
+ print $fh_out join( "\t", @fields ), "\n" if $fields[ 9 ] <= $max_count;
+ } else {
+ print $fh_out join( "\t", @fields ), "\n";
+ }
+ }
+
+ close $fh_in;
+ close $fh_out;
+
+ if ( not $max_count )
+ {
+ untie %count_hash;
+ unlink "$tmp_dir/hash.bdb";
+ }
+
+ rename "$tmp_dir/vmatch.count", $path;
+}
+
+
+sub vmatch_get_entry
+{
+ # Martin A. Hansen, January 2008.
+
+ # Parses vmatch output records.
+
+ my ( $fh, # file handle to vmatch result file.
+ ) = @_;
+
+ # Returns a hash.
+
+ my ( $line, @fields, %record );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ @fields = split "\t", $line;
+
+ $record{ "REC_TYPE" } = "VMATCH";
+
+ $record{ "S_LEN" } = $fields[ 0 ];
+ $record{ "S_ID" } = $fields[ 1 ];
+ $record{ "S_BEG" } = $fields[ 2 ];
+
+ if ( $fields[ 3 ] eq "D" ) {
+ $record{ "STRAND" } = "+";
+ } else {
+ $record{ "STRAND" } = "-";
+ }
+
+ $record{ "Q_LEN" } = $fields[ 4 ];
+ $record{ "Q_ID" } = $fields[ 5 ];
+ $record{ "Q_BEG" } = $fields[ 6 ];
+ $record{ "MATCH_DIST" } = $fields[ 7 ];
+ $record{ "E_VAL" } = $fields[ 8 ];
+ $record{ "SCORE" } = $fields[ 9 ];
+ $record{ "IDENT" } = $fields[ 10 ];
+
+ $record{ "Q_END" } = $record{ "Q_BEG" } + $record{ "Q_LEN" } - 1;
+ $record{ "S_END" } = $record{ "S_BEG" } + $record{ "S_LEN" } - 1;
+
+ return wantarray ? %record : \%record;
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+package Maasha::Matrix;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# This modules contains subroutines for simple matrix manipulations.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Storable qw( dclone );
+use Maasha::Common;
+use Maasha::Calc;
+use vars qw ( @ISA @EXPORT );
+use Exporter;
+
+@ISA = qw( Exporter );
+
+use constant {
+ ROWS => 0,
+ COLS => 1,
+};
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub matrix_dims
+{
+ # Martin A. Hansen, April 2007
+
+ # returns the dimensions of a matrix: rows x cols
+
+ my ( $matrix, # AoA data structure
+ ) = @_;
+
+ # returns a tuple
+
+ my ( $rows, $cols );
+
+ $rows = scalar @{ $matrix };
+ $cols = scalar @{ $matrix->[ 0 ] };
+
+ return wantarray ? ( $rows, $cols ) : [ $rows, $cols ];
+}
+
+
+sub matrix_check
+{
+ # Martin A. Hansen, April 2007.
+
+ # Checks that the matrix of even columns.
+ # return 1 if ok else 0.
+
+ my ( $matrix, # AoA data structure
+ ) = @_;
+
+ # returns boolean
+
+ my ( $dims, $row, $check );
+
+ $dims = &matrix_dims( $matrix );
+
+ $check = $dims->[ COLS ];
+
+ foreach $row ( @{ $matrix } ) {
+ return 0 if scalar @{ $row } != $check;
+ }
+
+ return 1;
+}
+
+
+sub matrix_summary
+{
+ # Martin A. Hansen, April 2007.
+
+ # For each column in a given matrix print:
+
+ my ( $matrix, # AoA data structure
+ ) = @_;
+
+ my ( $dims, $i, $col, $list, $type, $sort, $uniq, $min, $max, $mean );
+
+ die qq(ERROR: cannot summarize uneven matrix\n) if not &matrix_check( $matrix );
+
+ $dims = &matrix_dims( $matrix );
+
+ print join( "\t", "TYPE", "LEN", "UNIQ", "SORT", "MIN", "MAX", "MEAN" ), "\n";
+
+ for ( $i = 0; $i < $dims->[ COLS ]; $i++ )
+ {
+ $col = &cols_get( $matrix, $i, $i );
+ $list = &matrix_flip( $col )->[ 0 ];
+
+ if ( &list_check_numeric( $list ) ) {
+ $type = "num";
+ } else {
+ $type = "alph";
+ }
+
+ if ( &list_check_sort( $list, $type ) ) {
+ $sort = "yes";
+ } else {
+ $sort = "no";
+ }
+
+ if ( $type eq "num" )
+ {
+ if ( $sort eq "yes" )
+ {
+ $min = $list->[ 0 ];
+ $max = $list->[ -1 ];
+ }
+ else
+ {
+ ( $min, $max ) = &Maasha::Calc::minmax( $list );
+ }
+
+ $mean = sprintf( "%.2f", &Maasha::Calc::mean( $list ) );
+ }
+ else
+ {
+ $min = "N/A";
+ $max = "N/A";
+ $mean = "N/A";
+ }
+
+ $uniq = &list_uniq( $list );
+
+ print join( "\t", $type, $dims->[ ROWS ], $uniq, $sort, $min, $max, $mean ), "\n";
+ }
+}
+
+
+sub matrix_flip
+{
+ # Martin A. Hansen, April 2007
+
+ # flips a matrix making rows to columns and visa versa.
+
+ my ( $matrix, # AoA data structure
+ ) = @_;
+
+ # returns AoA
+
+ my ( $i, $c, $dims, $AoA );
+
+ die qq(ERROR: cannot flip uneven matrix\n) if not &matrix_check( $matrix );
+
+ $dims = &matrix_dims( $matrix );
+
+ for ( $i = 0; $i < $dims->[ ROWS ]; $i++ )
+ {
+ for ( $c = 0; $c < $dims->[ COLS ]; $c++ ) {
+ $AoA->[ $c ]->[ $i ] = $matrix->[ $i ]->[ $c ];
+ }
+ }
+
+ $matrix = $AoA;
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub matrix_rotate_right
+{
+ # Martin A. Hansen, April 2007
+
+ # Rotates elements in a given matrix a given
+ # number of positions to the right by popping columns,
+ # from the right matrix edge and prefixed to the left edge.
+
+ my ( $matrix, # AoA data structure
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $i, $dims, $col, $AoA );
+
+ $shift ||= 1;
+
+ die qq(ERROR: cannot right rotate uneven matrix\n) if not &matrix_check( $matrix );
+
+ $dims = &matrix_dims( $matrix );
+
+ for ( $i = 0; $i < $shift; $i++ )
+ {
+ $col = &cols_get( $matrix, $dims->[ COLS ] - 1, $dims->[ COLS ] - 1 );
+ $AoA = &cols_get( $matrix, 0, $dims->[ COLS ] - 2 );
+
+ &cols_unshift( $AoA, $col );
+
+ $matrix = $AoA;
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub matrix_rotate_left
+{
+ # Martin A. Hansen, April 2007
+
+ # Rotates elements in a given matrix a given
+ # number of positions to the left while columns
+ # are shifted from the left matrix edge and appended,
+ # to the right edge.
+
+ my ( $matrix, # AoA data structure
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $i, $dims, $col, $AoA );
+
+ $shift ||= 1;
+
+ die qq(ERROR: cannot right rotate uneven matrix\n) if not &matrix_check( $matrix );
+
+ $dims = &matrix_dims( $matrix );
+
+ for ( $i = 0; $i < $shift; $i++ )
+ {
+ $col = &cols_get( $matrix, 0, 0 );
+ $AoA = &cols_get( $matrix, 1, $dims->[ COLS ] - 1 );
+
+ &cols_push( $AoA, $col );
+
+ $matrix = $AoA;
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub matrix_rotate_up
+{
+ # Martin A. Hansen, April 2007
+
+ # Rotates elements in a given matrix a given
+ # number of positions up while rows are shifted
+ # from the top of the matrix to the bottom.
+
+ my ( $matrix, # AoA data structure
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims, $i, $row, $AoA );
+
+ $shift ||= 1;
+
+ $dims = &matrix_dims( $matrix );
+
+ for ( $i = 0; $i < $shift; $i++ )
+ {
+ $row = &rows_get( $matrix, 0, 0 );
+ $AoA = &rows_get( $matrix, 1, $dims->[ ROWS ] - 1 );
+
+ &rows_push( $AoA, dclone $row );
+
+ $matrix = $AoA;
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub matrix_rotate_down
+{
+ # Martin A. Hansen, April 2007
+
+ # Rotates elements in a given matrix a given
+ # number of positions down while rows are shifted
+ # from the bottom matrix edge to the top edge.
+
+ my ( $matrix, # AoA data structure
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims, $i, $row, $AoA );
+
+ $shift ||= 1;
+
+ $dims = &matrix_dims( $matrix );
+
+ for ( $i = 0; $i < $shift; $i++ )
+ {
+ $row = &rows_get( $matrix, $dims->[ ROWS ] - 1, $dims->[ ROWS ] - 1 );
+ $AoA = &rows_get( $matrix, 0, $dims->[ ROWS ] - 2 );
+
+ &rows_unshift( $AoA, $row );
+
+ $matrix = $AoA;
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub submatrix
+{
+ # Martin A. Hansen, April 2007
+
+ # returns a submatrix sliced from a given matrix
+
+ my ( $matrix, # AoA data structure
+ $row_beg, # first row - OPTIONAL (default 0)
+ $row_end, # last row - OPTIONAL (default last row)
+ $col_beg, # first col - OPTIONAL (default 0)
+ $col_end, # last col - OPTIONAL (default last col)
+ ) = @_;
+
+ # returns AoA
+
+ my ( $submatrix, $subsubmatrix );
+
+ $submatrix = &rows_get( $matrix, $row_beg, $row_end );
+ $subsubmatrix = &cols_get( $submatrix, $col_beg, $col_end );
+
+ return wantarray ? @{ $subsubmatrix } : $subsubmatrix;
+}
+
+
+sub row_get
+{
+ # Martin A. Hansen, April 2008.
+
+ # Returns a single row from a given matrix.
+
+ my ( $matrix, # AoA data structure
+ $row, # row to get
+ ) = @_;
+
+ # Returns a list;
+
+ my ( $dims, $i, @list );
+
+ $dims = &matrix_dims( $matrix );
+
+ &Maasha::Common::error( qq(Row->$row outside of matrix->$dims->[ ROWS ]) ) if $row > $dims->[ ROWS ];
+
+ @list = @{ $matrix->[ $row ] };
+
+ return wantarray ? @list : \@list;
+}
+
+
+sub rows_get
+{
+ # Martin A. Hansen, April 2007
+
+ # returns a range of requested rows from a given matrix.
+
+ my ( $matrix, # AoA data structure
+ $row_beg, # first row - OPTIONAL (default 0)
+ $row_end, # last row - OPTIONAL (default last row)
+ ) = @_;
+
+ # returns AoA
+
+ my ( @rows, $i );
+
+ $row_beg ||= 0;
+
+ if ( not defined $row_end ) {
+ $row_end = scalar @{ $matrix };
+ }
+
+ if ( $row_end >= scalar @{ $matrix } )
+ {
+ warn qq(WARNING: row end larger than matrix\n);
+ $row_end = scalar( @{ $matrix } ) - 1;
+ }
+
+ die qq(ERROR: row begin "$row_beg" larger than row end "$row_end"\n) if $row_end < $row_beg;
+
+ if ( $row_beg == 0 and $row_end == scalar( @{ $matrix } ) - 1 ) {
+ @rows = @{ $matrix };
+ } else {
+ @rows = @{ $matrix }[ $row_beg .. $row_end ];
+ }
+
+ return wantarray ? @rows : \@rows;
+}
+
+
+sub col_get
+{
+ # Martin A. Hansen, April 2008.
+
+ # Returns a single column from a given matrix.
+
+ my ( $matrix, # AoA data structure
+ $col, # column to get
+ ) = @_;
+
+ # Returns a list;
+
+ my ( $dims, $i, @list );
+
+ $dims = &matrix_dims( $matrix );
+
+ &Maasha::Common::error( qq(Column->$col outside of matrix->$dims->[ COLS ]) ) if $col > $dims->[ COLS ];
+
+ for ( $i = 0; $i < $dims->[ ROWS ]; $i++ ) {
+ push @list, $matrix->[ $i ]->[ $col ];
+ }
+
+ return wantarray ? @list : \@list;
+}
+
+
+sub cols_get
+{
+ # Martin A. Hansen, April 2007
+
+ # returns a range of requested columns from a given matrix
+
+ my ( $matrix, # AoA data structure
+ $col_beg, # first column - OPTIONAL (default 0)
+ $col_end, # last column - OPTIONAL (default last column)
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims, @cols, $row, @AoA );
+
+ $dims = &matrix_dims( $matrix );
+
+ $col_beg ||= 0;
+
+ if ( not defined $col_end ) {
+ $col_end = $dims->[ COLS ] - 1;
+ }
+
+ if ( $col_end > $dims->[ COLS ] - 1 )
+ {
+ warn qq(WARNING: column end larger than matrix\n);
+ $col_end = $dims->[ COLS ] - 1;
+ }
+
+ die qq(ERROR: column begin "$col_beg" larger than column end "$col_end"\n) if $col_end < $col_beg;
+
+ if ( $col_beg == 0 and $col_end == $dims->[ COLS ] - 1 )
+ {
+ @AoA = @{ $matrix };
+ }
+ else
+ {
+ foreach $row ( @{ $matrix } )
+ {
+ @cols = @{ $row }[ $col_beg .. $col_end ];
+
+ push @AoA, [ @cols ];
+ }
+ }
+
+ return wantarray ? @AoA : \@AoA;
+}
+
+
+sub col_sum
+{
+ my ( $matrix,
+ $col,
+ ) = @_;
+
+ my ( $list, $sum );
+
+ $list = &cols_get( $matrix, $col, $col );
+ $list = &matrix_flip( $list )->[ 0 ];
+
+ die qq(ERROR: cannot sum non-nummerical column\n);
+
+ $sum = &Maasha::Calc::sum( $list );
+
+ return $sum;
+}
+
+
+sub rows_push
+{
+ # Martin A. Hansen, April 2007.
+
+ # Appends one or more rows to a matrix.
+
+ my ( $matrix, # AoA data structure
+ $rows, # list of rows
+ ) = @_;
+
+ # returns AoA
+
+ push @{ $matrix }, @{ $rows };
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub rows_unshift
+{
+ # Martin A. Hansen, April 2007.
+
+ # Prefixes one or more rows to a matrix.
+
+ my ( $matrix, # AoA data structure
+ $rows, # list of rows
+ ) = @_;
+
+ # returns AoA
+
+ unshift @{ $matrix }, @{ $rows };
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub cols_push
+{
+ # Martin A. Hansen, April 2007.
+
+ # Appends one or more lists as columns to a matrix.
+
+ my ( $matrix, # AoA data structure
+ $cols, # list of columns
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims_matrix, $dims_cols, $i );
+
+ $dims_matrix = &matrix_dims( $matrix );
+ $dims_cols = &matrix_dims( $cols );
+
+ die qq(ERROR: Cannot merge columns with different row count\n) if $dims_matrix->[ ROWS ] != $dims_cols->[ ROWS ];
+
+ for ( $i = 0; $i < $dims_matrix->[ ROWS ]; $i++ )
+ {
+ push @{ $matrix->[ $i ] }, @{ $cols->[ $i ] };
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub cols_unshift
+{
+ # Martin A. Hansen, April 2007.
+
+ # Prefixes one or more lists as columns to a matrix.
+
+ my ( $matrix, # AoA data structure
+ $cols, # list of columns
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims_matrix, $dims_cols, $i );
+
+ $dims_matrix = &matrix_dims( $matrix );
+ $dims_cols = &matrix_dims( $cols );
+
+ die qq(ERROR: Cannot merge columns with different row count\n) if $dims_matrix->[ ROWS ] != $dims_cols->[ ROWS ];
+
+ for ( $i = 0; $i < $dims_matrix->[ ROWS ]; $i++ ) {
+ unshift @{ $matrix->[ $i ] }, @{ $cols->[ $i ] };
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub rows_rotate_left
+{
+ # Martin A. Hansen, April 2007.
+
+ # Given a matrix and a range of rows, rotates these rows
+ # left by shifting a given number of elements from
+ # the first position to the last.
+
+ my ( $matrix, # AoA data structure
+ $beg, # first row to shift
+ $end, # last row to shit
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $i, $c, $row );
+
+ $shift ||= 1;
+
+ for ( $i = $beg; $i <= $end; $i++ )
+ {
+ $row = &rows_get( $matrix, $i, $i );
+
+ for ( $c = 0; $c < $shift; $c++ )
+ {
+ $row = &list_rotate_left( @{ $row } );
+ $matrix->[ $i ] = $row;
+ }
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub rows_rotate_right
+{
+ # Martin A. Hansen, April 2007.
+
+ # Given a matrix and a range of rows, rotates these rows
+ # right by shifting a given number of elements from the
+ # last position to the first.
+
+ my ( $matrix, # AoA data structure
+ $beg, # first row to shift
+ $end, # last row to shit
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims, $i, $c, $row );
+
+ $shift ||= 1;
+
+ $dims = &matrix_dims( $matrix );
+
+ die qq(ERROR: end < beg: $end < $beg\n) if $end < $beg;
+ die qq(ERROR: row outside matrix\n) if $end >= $dims->[ ROWS ];
+
+ for ( $i = $beg; $i <= $end; $i++ )
+ {
+ $row = &rows_get( $matrix, $i, $i );
+
+ for ( $c = 0; $c < $shift; $c++ )
+ {
+ $row = &list_rotate_right( @{ $row } );
+ $matrix->[ $i ] = $row;
+ }
+ }
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub cols_rotate_up
+{
+ # Martin A. Hansen, April 2007.
+
+ # Given a matrix and a range of columns, rotates these columns
+ # ups by shifting the the first cell of each row from the
+ # first position to the last.
+
+ my ( $matrix, # AoA data structure
+ $beg, # first row to shift
+ $end, # last row to shit
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims, $i, $c, $cols_pre, $col_select, $cols_post, $list );
+
+ $shift ||= 1;
+
+ $dims = &matrix_dims( $matrix );
+
+ $cols_pre = &cols_get( $matrix, 0, $beg - 1 ) if $beg > 0;
+ $cols_post = &cols_get( $matrix, $end + 1, $dims->[ COLS ] - 1 ) if $end < $dims->[ COLS ] - 1;
+
+ for ( $i = $beg; $i <= $end; $i++ )
+ {
+ $col_select = &cols_get( $matrix, $i, $i );
+
+ $list = &matrix_flip( $col_select )->[ 0 ];
+
+ for ( $c = 0; $c < $shift; $c++ ) {
+ $list = &list_rotate_left( $list );
+ }
+
+ $col_select = &matrix_flip( [ $list ] );
+
+ if ( $cols_pre ) {
+ &cols_push( $cols_pre, $col_select );
+ } else {
+ $cols_pre = $col_select;
+ }
+ }
+
+ &cols_push( $cols_pre, $cols_post ) if $cols_post;
+
+ $matrix = $cols_pre;
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub cols_rotate_down
+{
+ # Martin A. Hansen, April 2007.
+
+ # Given a matrix and a range of columns, rotates these columns
+ # ups by shifting the the first cell of each row from the
+ # first position to the last.
+
+ my ( $matrix, # AoA data structure
+ $beg, # first row to shift
+ $end, # last row to shit
+ $shift, # number of shifts - DEFAULT=1
+ ) = @_;
+
+ # returns AoA
+
+ my ( $dims, $i, $c, $cols_pre, $col_select, $cols_post, $list );
+
+ $shift ||= 1;
+
+ $dims = &matrix_dims( $matrix );
+
+ $cols_pre = &cols_get( $matrix, 0, $beg - 1 ) if $beg > 0;
+ $cols_post = &cols_get( $matrix, $end + 1, $dims->[ COLS ] - 1 ) if $end < $dims->[ COLS ] - 1;
+
+ for ( $i = $beg; $i <= $end; $i++ )
+ {
+ $col_select = &cols_get( $matrix, $i, $i );
+
+ $list = &matrix_flip( $col_select )->[ 0 ];
+
+ for ( $c = 0; $c < $shift; $c++ ) {
+ $list = &list_rotate_right( $list );
+ }
+
+ $col_select = &matrix_flip( [ $list ] );
+
+ if ( $cols_pre ) {
+ &cols_push( $cols_pre, $col_select );
+ } else {
+ $cols_pre = $col_select;
+ }
+ }
+
+ &cols_push( $cols_pre, $cols_post ) if $cols_post;
+
+ $matrix = $cols_pre;
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+sub list_rotate_left
+{
+ # Martin A. Hansen, April 2007.
+
+ # given a list, shifts off the first element,
+ # and appends to the list, which is returned.
+
+ my ( $list, # list to rotate
+ ) = @_;
+
+ my ( @new_list, $elem );
+
+ @new_list = @{ $list };
+
+ $elem = shift @new_list;
+
+ push @new_list, $elem;
+
+ return wantarray ? @new_list : \@new_list;
+}
+
+
+sub list_rotate_right
+{
+ # Martin A. Hansen, April 2007.
+
+ # given a list, pops off the last element,
+ # and prefixes to the list, which is returned.
+
+ my ( $list, # list to rotate
+ ) = @_;
+
+ my ( @new_list, $elem );
+
+ @new_list = @{ $list };
+
+ $elem = pop @new_list;
+
+ unshift @new_list, $elem;
+
+ return wantarray ? @new_list : \@new_list;
+}
+
+
+sub list_check_numeric
+{
+ # Martin A. Hansen, April 2007.
+
+ # Checks if a given list only contains
+ # numerical elements. return 1 if numerical,
+ # else 0.
+
+ my ( $list, # list to check
+ ) = @_;
+
+ # returns integer
+
+ my ( $elem );
+
+ foreach $elem ( @{ $list } ) {
+ return 0 if not $elem =~ /^\d+$/; # how about scientific notation ala 123.2312e-03 ?
+ }
+
+ return 1;
+}
+
+
+sub list_check_sort
+{
+ # Martin A. Hansen, April 2007.
+
+ # Checks if a given list is sorted.
+ # If the sort type is not specified, we
+ # are going to check the type and make a guess.
+ # Returns 1 if sorted else 0.
+
+ my ( $list, # list to check
+ $type, # numerical of alphabetical
+ ) = @_;
+
+ # returns integer
+
+ my ( $i, $cmp );
+
+ if ( not $type )
+ {
+ if ( &list_check_numeric( $list ) ) {
+ $type = "n";
+ } else {
+ $type = "a";
+ }
+ }
+ else
+ {
+ if ( $type =~ /^a.*/i ) {
+ $type = "a";
+ } else {
+ $type = "n";
+ }
+ }
+
+ if ( @{ $list } > 1 )
+ {
+ if ( $type eq "n" )
+ {
+ for ( $i = 1; $i < @{ $list }; $i++ )
+ {
+ $cmp = $list->[ $i - 1 ] <=> $list->[ $i ];
+
+ return 0 if $cmp > 0;
+ }
+ }
+ else
+ {
+ for ( $i = 1; $i < @{ $list }; $i++ )
+ {
+ $cmp = $list->[ $i - 1 ] cmp $list->[ $i ];
+
+ return 0 if $cmp > 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+sub list_uniq
+{
+ # Martin A. Hansen, April 2007.
+
+ # returns the number of unique elements in a
+ # given list.
+
+ my ( $list, # list
+ ) = @_;
+
+ # returns integer
+
+ my ( %hash, $count );
+
+ map { $hash{ $_ } = 1 } @{ $list };
+
+ $count = scalar keys %hash;
+
+ return $count;
+}
+
+
+sub tabulate
+{
+ # Martin A. Hansen, April 2007.
+
+ my ( $matrix, # AoA data structure
+ $col,
+ ) = @_;
+
+ my ( $dims, $list, $i, $max, $len, %hash, $elem, @list );
+
+ $dims = &matrix_dims( $matrix );
+
+ $list = &cols_get( $matrix, $col, $col );
+ $list = &matrix_flip( $list )->[ 0 ];
+
+ $max = 0;
+
+ for ( $i = 0; $i < @{ $list }; $i++ )
+ {
+ $hash{ $list->[ $i ] }++;
+
+ $len = length $list->[ $i ];
+
+ $max = $len if $len > $max;
+ }
+
+ @list = keys %hash;
+
+ if ( &list_check_numeric( $list ) ) {
+ @list = sort { $a <=> $b } @list;
+ } else {
+ @list = sort { $a cmp $b } @list;
+ }
+
+ foreach $elem ( @list )
+ {
+ print $elem, " " x ( $max - length( $elem ) ),
+ sprintf( " %6s ", $hash{ $elem } ),
+ sprintf( "%.2f\n", ( $hash{ $elem } / $dims->[ ROWS ] ) * 100 );
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BINARY SEARCH <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub interval_search
+{
+ # Martin A. Hansen, February 2008.
+
+ # Uses binary search to locate the interval containing a
+ # given number. The intervals are defined by begin and end
+ # positions in seperate columns in a matrix. If a interval is
+ # found then the index of that matrix row is returned, otherwise
+ # -1 is returned.
+
+ my ( $matrix, # data structure
+ $col1, # column with interval begins
+ $col2, # column with interval ends
+ $num, # number to search for
+ ) = @_;
+
+ # Returns an integer.
+
+ my ( $high, $low, $try );
+
+ $low = 0;
+ $high = @{ $matrix };
+
+ while ( $low < $high )
+ {
+ $try = int( ( $high + $low ) / 2 );
+
+ # print "num->$num low->$low high->$high try->$try int1->$matrix->[ $try ]->[ $col1 ] int2->$matrix->[ $try ]->[ $col2 ]\n";
+
+ if ( $num < $matrix->[ $try ]->[ $col1 ] )
+ {
+ $high = $try;
+ }
+ elsif ( $num > $matrix->[ $try ]->[ $col2 ] )
+ {
+ $low = $try + 1;
+ }
+ else
+ {
+ return $try;
+ }
+ }
+
+ return -1;
+}
+
+
+sub list_search
+{
+ # Martin A. Hansen, February 2008.
+
+ # Uses binary search to locate a number in a list of numbers.
+ # If the number is found, then the index (the position of the number
+ # in the list) is returned, otherwise -1 is returned.
+
+ my ( $list, # list of numbers
+ $num, # number to search for
+ ) = @_;
+
+ # Returns an integer.
+
+ my ( $high, $low, $try );
+
+ $low = 0;
+ $high = @{ $list };
+
+ while ( $low < $high )
+ {
+ $try = int( ( $high + $low ) / 2 );
+
+ # print "num->$num low->$low high->$high try->$try int->$list->[ $try ]\n";
+
+ if ( $num < $list->[ $try ] )
+ {
+ $high = $try;
+ }
+ elsif ( $num > $list->[ $try ] )
+ {
+ $low = $try + 1;
+ }
+ else
+ {
+ return $try;
+ }
+ }
+
+ return -1;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DISK SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub matrix_read
+{
+ # Martin A. Hansen, April 2007
+
+ # Reads tabular data from file into a matrix
+ # AoA data structure.
+
+ my ( $path, # full path to file with data
+ $delimiter, # column delimiter - OPTIONAL (default tab)
+ $comments, # regex for comment lines to skip - OPTIONAL
+ $fields_ok, # list of fields to accept - OPTIONAL
+ ) = @_;
+
+ # returns AoA
+
+ my ( $fh, $line, @fields, @AoA );
+
+ $delimiter ||= "\t";
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ next if $comments and $line =~ /^$comments/;
+
+ @fields = split /$delimiter/, $line;
+
+ map { splice( @fields, $_, 1 ) } @{ $fields_ok } if $fields_ok;
+
+ push @AoA, [ @fields ];
+ }
+
+ close $fh;
+
+ return wantarray ? @AoA : \@AoA;
+}
+
+
+sub matrix_write
+{
+ # Martin A. Hansen, April 2007
+
+ # Writes a tabular data structure to STDOUT or file.
+
+ my ( $matrix, # AoA data structure
+ $path, # full path to output file - OPTIONAL (default STDOUT)
+ $delimiter, # column delimiter - OPTIONAL (default tab)
+ ) = @_;
+
+ my ( $fh, $row );
+
+ $fh = &Maasha::Common::write_open( $path ) if $path;
+
+ $delimiter ||= "\t";
+
+ foreach $row ( @{ $matrix } )
+ {
+ if ( $fh ) {
+ print $fh join( $delimiter, @{ $row } ), "\n";
+ } else {
+ print join( $delimiter, @{ $row } ), "\n";
+ }
+ }
+
+ close $fh if $fh;
+}
+
+
+sub matrix_store
+{
+ # Martin A. Hansen, April 2007.
+
+ # stores a matrix to a binary file.
+
+ my ( $path, # full path to file
+ $matrix, # data structure
+ ) = @_;
+
+ &Maasha::Common::file_store( $path, $matrix );
+}
+
+
+sub matrix_retrive
+{
+ # Martin A. Hansen, April 2007.
+
+ # retrieves a matrix from a binary file
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ my $matrix = &Maasha::Common::file_retrieve( $path );
+
+ return wantarray ? @{ $matrix } : $matrix;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+package Maasha::NCBI;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Stuff for interacting with NCBI Entrez
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use LWP::Simple;
+use Maasha::Common;
+
+use vars qw( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_entry
+{
+ # Martin A. Hansen, March 2007.
+
+ # connects to the ncbi website and retrieves a genbank record,
+ # which is returned.
+
+ my ( $db, # database <nucleotide|protein>
+ $id, # genbank id
+ $type, # retrieval type <gb|gp>
+ ) = @_;
+
+ # returns string
+
+ my ( $content, @lines, $i, $seq );
+
+ $content = get "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$db&id=$id&rettype=$type";
+
+ return $content;
+}
+
+
+sub get_seq
+{
+ # Martin A. Hansen, March 2007.
+
+ # connects to the ncbi website and retrieves a genbank record,
+ # from which the sequence is parsed and returned.
+
+ my ( $db, # database <nucleotide|protein>
+ $id, # genbank id
+ $type, # retrieval type <gb|gp>
+ ) = @_;
+
+ # returns string
+
+ my ( $content, @lines, $i, $seq );
+
+ $content = get "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=$db&id=$id&rettype=$type";
+
+ @lines = split "\n", $content;
+
+ $i = 0;
+
+ while ( $lines[ $i ] !~ /^ORIGIN/ ) {
+ $i++
+ }
+
+ $i++;
+
+ while ( $lines[ $i ] !~ /^\/\// )
+ {
+ $lines[ $i ] =~ s/^\s*\d+//;
+
+ $seq .= $lines[ $i ];
+
+ $i++;
+ }
+
+ $seq =~ tr/ //d;
+
+ return $seq;
+}
+
+
+sub soft_parse
+{
+ # Martin A. Hansen, February 2008.
+
+ # !!! NOT USED ANYMORE !!! #
+
+ # Reads in and parses a file in SOFT format.
+
+ my ( $path, # full path to SOFT file
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $fh, @lines, $i, $c, $num, %key_hash, @fields, %id_hash, $id, $seq, $count, $record, @records, $platform_id, $sample_id, $sample_title );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ @lines = <$fh>;
+
+ close $fh;
+
+ chomp @lines;
+
+ $i = 0;
+
+ $num = 1;
+
+ while ( $i < @lines )
+ {
+ if ( $lines[ $i ] =~ /^\^PLATFORM = (.+)/ )
+ {
+ $platform_id = $1;
+ }
+ elsif ( $lines[ $i ] =~ /^!platform_table_begin$/ )
+ {
+ @fields = split "\t", $lines[ $i + 1 ];
+
+ for ( $c = 0; $c < @fields; $c++ ) {
+ $key_hash{ $fields[ $c ] } = $c;
+ }
+
+ $c = $i + 2;
+
+ while ( $lines[ $c ] !~ /^!platform_table_end$/ )
+ {
+ @fields = split "\t", $lines[ $c ];
+
+ $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
+
+ $c++;
+ }
+
+ $i = $c;
+ }
+ elsif ( $lines[ $i ] =~ /^\^SAMPLE = (.+)/ )
+ {
+ $sample_id = $1;
+ }
+ elsif ( $lines[ $i ] =~ /^!Sample_title = (.+)/ )
+ {
+ $sample_title = $1;
+ }
+ elsif ( $lines[ $i ] =~ /^!sample_table_begin/ )
+ {
+ undef %key_hash;
+
+ @fields = split "\t", $lines[ $i + 1 ];
+
+ for ( $c = 0; $c < @fields; $c++ ) {
+ $key_hash{ $fields[ $c ] } = $c;
+ }
+
+ $c = $i + 2;
+
+ while ( $lines[ $c ] !~ /^!sample_table_end$/ )
+ {
+ undef $record;
+
+ @fields = split "\t", $lines[ $c ];
+
+ $id = $fields[ $key_hash{ "ID_REF" } ];
+ $seq = $id_hash{ $id };
+ $count = $fields[ $key_hash{ "VALUE" } ];
+
+ $seq =~ tr/./N/;
+
+ $record->{ "SAMPLE_TITLE" } = $sample_title;
+ $record->{ "SEQ" } = $seq;
+ $record->{ "SEQ_NAME" } = join( "_", $platform_id, $sample_id, $num, $count );
+
+ push @records, $record;
+
+ $c++;
+ $num++;
+ }
+
+ $i = $c;
+
+ $num = 1;
+ }
+
+ $i++;
+ }
+
+ return wantarray ? @records : \@records;
+}
+
+
+sub soft_index_file
+{
+ # Martin A. Hansen, June 2008.
+
+ # Create a index with linenumbers of the different tables
+ # in a soft file. The index is returned.
+
+ my ( $file, # file to index
+ ) = @_;
+
+ # Returns
+
+ my ( $fh, $line, $i, $c, @index, $first );
+
+ $fh = &Maasha::Common::read_open( $file );
+
+ $first = 1;
+
+ $i = 0;
+ $c = 0;
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^\^/ )
+ {
+ push @index, [ $line, $i ];
+
+ if ( not $first )
+ {
+ push @{ $index[ $c - 1 ] }, $i - 1;
+ }
+ else
+ {
+ $first = 0;
+ }
+
+ $c++;
+ }
+
+ $i++;
+ }
+
+ push @{ $index[ $c - 1 ] }, $i - 1;
+
+ close $fh;
+
+ return wantarray ? @index : \@index;
+}
+
+
+sub soft_get_platform
+{
+ # Martin A. Hansen, June 2008.
+
+ # Given a filehandle to a SOFT file parses the platform table
+ # which is returned.
+
+ my ( $fh, # filehandle
+ $beg, # line number where platform tables begin
+ $end, # line number where platform tables end
+ ) = @_;
+
+ # Returns hashref
+
+ my ( $line, @lines, $i, $c, @fields, %key_hash, %id_hash );
+
+ $i = 0;
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ push @lines, $line if $i >= $beg;
+
+ last if $i == $end;
+
+ $i++;
+ }
+
+ $i = 0;
+
+ while ( $i < @lines )
+ {
+ if ( $lines[ $i ] =~ /^!platform_table_begin$/ )
+ {
+ @fields = split "\t", $lines[ $i + 1 ];
+
+ for ( $c = 0; $c < @fields; $c++ ) {
+ $key_hash{ $fields[ $c ] } = $c;
+ }
+
+ $c = $i + 2;
+
+ while ( $lines[ $c ] !~ /^!platform_table_end$/ )
+ {
+ @fields = split "\t", $lines[ $c ];
+
+ $id_hash{ $fields[ $key_hash{ "ID" } ] } = $fields[ $key_hash{ "SEQUENCE" } ];
+
+ $c++;
+ }
+
+ $i = $c;
+ }
+
+ $i++;
+ }
+
+ return wantarray ? %id_hash : \%id_hash;
+}
+
+
+sub soft_get_sample
+{
+ # Martin A. Hansen, June 2008.
+
+ # Given a filehandle to a SOFT file parses the platform table
+ # which is returned.
+
+ my ( $fh, # filehandle
+ $plat_table, # hashref with platform tables
+ $beg, # line number where sample table begin
+ $end, # line number where sample table end
+ ) = @_;
+
+ # Returns hashref
+
+ my ( $line, @lines, $i, $c, $platform_id, @fields, %key_hash, $num, $sample_id, $sample_title, $id, $seq, $count, @records, $record );
+
+ $i = 0;
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ push @lines, $line if $i >= $beg;
+
+ last if $i == $end;
+
+ $i++;
+ }
+
+ $i = 0;
+
+ $num = 1;
+
+ while ( $i < @lines )
+ {
+ if ( $lines[ $i ] =~ /^\^SAMPLE = (.+)/ )
+ {
+ $sample_id = $1;
+ }
+ elsif ( $lines[ $i ] =~ /!Sample_platform_id = (.+)/ )
+ {
+ $platform_id = $1;
+ }
+ elsif ( $lines[ $i ] =~ /^!Sample_title = (.+)/ )
+ {
+ $sample_title = $1;
+ }
+ elsif ( $lines[ $i ] =~ /^!sample_table_begin/ )
+ {
+ undef %key_hash;
+
+ @fields = split "\t", $lines[ $i + 1 ];
+
+ for ( $c = 0; $c < @fields; $c++ ) {
+ $key_hash{ $fields[ $c ] } = $c;
+ }
+
+ $c = $i + 2;
+
+ while ( $lines[ $c ] !~ /^!sample_table_end$/ )
+ {
+ undef $record;
+
+ @fields = split "\t", $lines[ $c ];
+
+ $id = $fields[ $key_hash{ "ID_REF" } ];
+ $seq = $plat_table->{ $id };
+ $count = $fields[ $key_hash{ "VALUE" } ];
+
+ $seq =~ tr/./N/;
+
+ $record->{ "SAMPLE_TITLE" } = $sample_title;
+ $record->{ "SEQ" } = $seq;
+ $record->{ "SEQ_NAME" } = join( "_", $platform_id, $sample_id, $num, $count );
+
+ push @records, $record;
+
+ $c++;
+ $num++;
+ }
+
+ $i = $c;
+
+ $num = 1;
+ }
+
+ $i++;
+ }
+
+ return wantarray ? @records : \@records;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+package Maasha::Patscan;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# This module contains commonly used routines
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Maasha::Common;
+use Maasha::Seq;
+use vars qw ( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub read_patterns
+{
+ # Martin A. Hansen, August 2007.
+
+ # Read a list of patterns from file with one pattern
+ # per line.
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ # Returns list.
+
+ my ( $fh, $line, @patterns );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ next if $line eq "";
+
+ push @patterns, $line;
+ }
+
+ close $fh;
+
+ return wantarray ? @patterns : \@patterns;
+}
+
+
+sub parse_patterns
+{
+ # Martin A. Hansen, November 2007.
+
+ # Splits a string of patterns with out breaking patterns with [,,].
+
+ my ( $str, # comma separated list of patterns
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $i, $char, $brackets, @patterns );
+
+ $brackets = 0;
+
+ for ( $i = 0; $i < length $str; $i++ )
+ {
+ $char = substr $str, $i, 1;
+
+ if ( $char eq "[" ) {
+ $brackets++;
+ } elsif ( $char eq "]" ) {
+ $brackets--;
+ } elsif ( $char eq "," and $brackets != 0 ) {
+ substr $str, $i, 1, '!';
+ }
+ }
+
+ @patterns = split ",", $str;
+
+ map { s/!/,/g } @patterns;
+
+ return wantarray ? @patterns : \@patterns;
+}
+
+
+sub parse_scan_result
+{
+ # Martin A. Hansen, January 2007.
+
+ # Parses scan_for_matches results
+
+ my ( $entry, # FASTA tuple
+ $pattern, # pattern used in patscan
+ ) = @_;
+
+ # Returns hash.
+
+ my ( $head, $seq, $beg, $end, $len, $strand, %match );
+
+ ( $head, $seq ) = @{ $entry };
+
+ if ( $head =~ /^(.+):\[(\d+),(\d+)\]$/ )
+ {
+ $head = $1;
+ $beg = $2;
+ $end = $3;
+
+ if ( $beg > $end )
+ {
+ ( $beg, $end ) = ( $end, $beg );
+
+ $strand = "-";
+ }
+ else
+ {
+ $strand = "+";
+ }
+
+ $len = $end - $beg + 1;
+
+ %match = (
+ "REC_TYPE" => "PATSCAN",
+ "PATTERN" => $pattern,
+ "Q_ID" => $pattern,
+ "S_ID" => $head,
+ "S_BEG" => $beg - 1, # sfm is 1-based
+ "S_END" => $end - 1, # sfm is 1-based
+ "MATCH_LEN" => $len,
+ "SCORE" => 100,
+ "STRAND" => $strand,
+ "HIT" => $seq,
+ );
+ }
+ else
+ {
+ warn qq(WARNING: Could not parse match header->$head<-\n);
+ }
+
+ return wantarray ? %match : \%match;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::Plot;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines to plot stuff with Gnuplot and SVG.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use SVG;
+use IPC::Open2;
+use Time::HiRes qw( gettimeofday );
+use Maasha::Common;
+use Maasha::Calc;
+use vars qw ( @ISA @EXPORT );
+
+use constant {
+ WIDTH => 800,
+ HEIGHT => 600,
+ MARGIN => 40,
+};
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> LINEPLOTS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub lineplot_simple
+{
+ # Martin A. Hansen, January 2008.
+
+ # Plots a simple lineplot using Gnuplot.
+
+ my ( $data, # data table - each column will be plottet as one line.
+ $options, # options hash
+ $tmp_dir, # temporary directory
+ ) = @_;
+
+ # Returns list.
+
+ my ( $tmp_file, $pid, $fh_in, $fh_out, $cmd, $i, $line, @lines, $xtic_space, @plot_cmd );
+
+ $tmp_dir ||= $ENV{ 'TMP_DIR' };
+
+ $tmp_file = "$tmp_dir/lineplot_simple.tab";
+
+ $fh_out = &Maasha::Common::write_open( $tmp_file );
+
+ map { print $fh_out join( "\t", @{ $_ } ), "\n" } @{ $data };
+
+ close $fh_out;
+
+ $options->{ "terminal" } ||= "dumb";
+
+ $cmd = "gnuplot";
+
+ $pid = open2( $fh_out, $fh_in, $cmd );
+
+ # $fh_in = \*STDERR;
+
+ print $fh_in "set terminal $options->{ 'terminal' }\n";
+ print $fh_in "set title \"$options->{ 'title' }\"\n" if $options->{ "title" };
+ print $fh_in "set xlabel \"$options->{ 'xlabel' }\"\n" if $options->{ "xlabel" };
+ print $fh_in "set ylabel \"$options->{ 'ylabel' }\"\n" if $options->{ "ylabel" };
+ print $fh_in "set grid\n" if not $options->{ "terminal" } eq "dumb";
+ print $fh_in "set autoscale\n";
+ print $fh_in "unset key\n";
+ print $fh_in "set xtics border in scale 0 nomirror rotate by 90 offset character 0, 0, 0\n";
+
+ for ( $i = 1; $i < scalar @{ $data->[ 0 ] } + 1; $i++ ) {
+ push @plot_cmd, qq("$tmp_file" using $i with lines ls 1);
+ }
+
+ print $fh_in "plot " . join( ", ", @plot_cmd ) . "\n";
+
+ close $fh_in;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ push @lines, $line;
+ }
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ unlink $tmp_file;
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> HISTOGRAMS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub histogram_simple
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plots a simple histogram using Gnuplot.
+
+ my ( $data, # list of [ xlabel, data value ] tuples
+ $options, # options hash
+ ) = @_;
+
+ # Returns list.
+
+ my ( $pid, $fh_in, $fh_out, $cmd, $i, $line, @lines );
+
+ $options->{ "terminal" } ||= "dumb";
+
+ $cmd = "gnuplot";
+
+ $pid = open2( $fh_out, $fh_in, $cmd );
+
+# $fh_in = \*STDERR;
+
+ # print $fh_in "set terminal $options->{ 'terminal' } 10 \n"; # adsjust fontsize to 10 - find some other way to do this, because it don't work with SVG.
+ print $fh_in "set terminal $options->{ 'terminal' }\n";
+ print $fh_in "set title \"$options->{ 'title' }\"\n" if $options->{ "title" };
+ print $fh_in "set xlabel \"$options->{ 'xlabel' }\"\n" if $options->{ "xlabel" };
+ print $fh_in "set ylabel \"$options->{ 'ylabel' }\"\n" if $options->{ "ylabel" };
+ print $fh_in "set autoscale\n";
+ print $fh_in "unset key\n";
+ print $fh_in "set style fill solid\n";
+ print $fh_in "set style histogram title offset character 0, 0, 0\n";
+ print $fh_in "set style data histograms\n";
+ print $fh_in "set xtics border in scale 0 nomirror rotate by 90 offset character 0, 0, 0\n";
+
+ print $fh_in "plot '-' using 2:xticlabels(1)\n";
+
+ for ( $i = 0; $i < @{ $data }; $i++ )
+ {
+ print $fh_in join( "\t", "\"$data->[ $i ]->[ 0 ]\"", $data->[ $i ]->[ 1 ] ), "\n";
+ }
+
+ close $fh_in;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ push @lines, $line;
+ }
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+sub histogram_lendist
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plots a histogram using Gnuplot.
+
+ my ( $data, # list of [ xlabel, data value ] tuples
+ $options, # options hash
+ ) = @_;
+
+ # Returns list.
+
+ my ( $pid, $fh_in, $fh_out, $cmd, $i, $line, @lines, $xtic_space );
+
+ $options->{ "terminal" } ||= "dumb";
+
+ if ( $data->[ -1 ]->[ 0 ] <= 10 ) {
+ $xtic_space = 1;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 100 ) {
+ $xtic_space = 5;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 250 ) {
+ $xtic_space = 10;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 500 ) {
+ $xtic_space = 20;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 1000 ) {
+ $xtic_space = 50;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 2500 ) {
+ $xtic_space = 100;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 5000 ) {
+ $xtic_space = 250;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 10000 ) {
+ $xtic_space = 500;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 50000 ) {
+ $xtic_space = 1000;
+ } elsif ( $data->[ -1 ]->[ 0 ] <= 100000 ) {
+ $xtic_space = 5000;
+ }
+
+ $cmd = "gnuplot";
+
+ $pid = open2( $fh_out, $fh_in, $cmd );
+
+ print $fh_in "set terminal $options->{ 'terminal' }\n";
+ print $fh_in "set title \"$options->{ 'title' }\"\n" if $options->{ "title" };
+ print $fh_in "set xlabel \"$options->{ 'xlabel' }\"\n" if $options->{ "xlabel" };
+ print $fh_in "set ylabel \"$options->{ 'ylabel' }\"\n" if $options->{ "ylabel" };
+ print $fh_in "set autoscale\n";
+ print $fh_in "unset key\n";
+ print $fh_in "set style fill solid\n";
+ print $fh_in "set style histogram clustered gap 1 title offset character 0, 0, 0\n";
+ print $fh_in "set style data histograms\n";
+ print $fh_in "set xtics 0,$xtic_space border out nomirror\n";
+
+ print $fh_in "plot '-' using 1\n";
+
+ for ( $i = 0; $i < @{ $data }; $i++ )
+ {
+ $data->[ $i ]->[ 0 ] = "." if $data->[ $i ]->[ 0 ] % 10 != 0;
+
+ print $fh_in join( "\t", $data->[ $i ]->[ 1 ] ), "\n";
+ }
+
+ close $fh_in;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ push @lines, $line;
+ }
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+sub histogram_chrdist
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plots a histogram using Gnuplot.
+
+ my ( $data, # list of [ xlabel, data value ] tuples
+ $options, # options hash
+ ) = @_;
+
+ # Returns list.
+
+ my ( $pid, $fh_in, $fh_out, $cmd, $i, $line, @lines );
+
+ $options->{ "terminal" } ||= "dumb";
+
+ $cmd = "gnuplot";
+
+ $pid = open2( $fh_out, $fh_in, $cmd );
+
+ print $fh_in "set terminal $options->{ 'terminal' }\n";
+ print $fh_in "set title \"$options->{ 'title' }\"\n" if $options->{ "title" };
+ print $fh_in "set xlabel \"$options->{ 'xlabel' }\"\n" if $options->{ "xlabel" };
+ print $fh_in "set ylabel \"$options->{ 'ylabel' }\"\n" if $options->{ "ylabel" };
+ print $fh_in "set autoscale\n";
+ print $fh_in "unset key\n";
+ print $fh_in "set style fill solid\n";
+ print $fh_in "set style histogram title offset character 0, 0, 0\n";
+ print $fh_in "set style data histograms\n";
+ print $fh_in "set xtics border in scale 0 nomirror rotate by 90 offset character 0, 0, 0\n";
+
+ print $fh_in "plot '-' using 2:xticlabels(1)\n";
+
+ for ( $i = 0; $i < @{ $data }; $i++ ) {
+ print $fh_in join( "\t", "\"$data->[ $i ]->[ 0 ]\"", $data->[ $i ]->[ 1 ] ), "\n";
+ }
+
+ close $fh_in;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ push @lines, $line;
+ }
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DOTPLOT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub dotplot_matches
+{
+ # Martin A. Hansen, August 2007.
+
+ # Generates a dotplot from a list of matches using Gnuplot.
+
+ my ( $matches, # list of hashrefs.
+ $options, # options hash
+ $tmp_dir, # temporary directory
+ ) = @_;
+
+ # Returns list.
+
+ my ( $forward_file, $backward_file, $pid, $fh_forward, $fh_backward,
+ $fh_in, $fh_out, $cmd, $match, $line, @lines, $q_max, $s_max );
+
+ $tmp_dir ||= $ENV{ 'TMP_DIR' };
+
+ $forward_file = "$tmp_dir/match_f.tab";
+ $backward_file = "$tmp_dir/match_r.tab";
+
+ $fh_forward = &Maasha::Common::write_open( $forward_file );
+ $fh_backward = &Maasha::Common::write_open( $backward_file );
+
+ $q_max = 0;
+ $s_max = 0;
+
+ foreach $match ( @{ $matches } )
+ {
+ if ( $match->{ "DIR" } =~ /^f/ )
+ {
+ print $fh_forward join( "\t", $match->{ "Q_BEG" } + 1, $match->{ "S_BEG" } + 1 ), "\n";
+ print $fh_forward join( "\t", $match->{ "Q_END" } + 1, $match->{ "S_END" } + 1 ), "\n";
+ print $fh_forward "\n\n";
+ }
+ else
+ {
+ print $fh_backward join( "\t", $match->{ "Q_BEG" } + 1, $match->{ "S_END" } + 1 ), "\n";
+ print $fh_backward join( "\t", $match->{ "Q_END" } + 1, $match->{ "S_BEG" } + 1 ), "\n";
+ print $fh_backward "\n\n";
+ }
+
+ $q_max = $match->{ "Q_END" } if $match->{ "Q_END" } > $q_max;
+ $s_max = $match->{ "S_END" } if $match->{ "S_END" } > $s_max;
+ }
+
+ $q_max++;
+ $s_max++;
+
+ close $fh_forward;
+ close $fh_backward;
+
+ $options->{ "terminal" } ||= "dumb";
+
+ $cmd = "gnuplot";
+
+ $pid = open2( $fh_out, $fh_in, $cmd );
+
+ print $fh_in "set terminal $options->{ 'terminal' }\n";
+ print $fh_in "set xrange [1:$q_max]\n";
+ print $fh_in "set yrange [1:$s_max]\n";
+ print $fh_in "set title \"$options->{ 'title' }\"\n" if $options->{ "title" };
+ print $fh_in "set xlabel \"$options->{ 'xlabel' }\"\n" if $options->{ "xlabel" };
+ print $fh_in "set ylabel \"$options->{ 'ylabel' }\"\n" if $options->{ "ylabel" };
+ print $fh_in "unset key\n";
+
+ if ( $options->{ "terminal" } ne "dumb" )
+ {
+ print $fh_in "set style line 1 linetype 1 linecolor rgb \"green\" linewidth 2 pointtype 6 pointsize default\n";
+ print $fh_in "set style line 2 linetype 1 linecolor rgb \"red\" linewidth 2 pointtype 6 pointsize default\n";
+ }
+
+ print $fh_in "set xtics border out\n";
+ print $fh_in "set ytics border out\n";
+ print $fh_in "set grid\n";
+
+ if ( $options->{ "direction" } =~ /^b/ ) {
+ print $fh_in qq(plot "$forward_file" with lines ls 1, "$backward_file" with lines ls 2\n);
+ } elsif ( $options->{ "direction" } =~ /^f/ ) {
+ print $fh_in qq(plot "$forward_file" with lines ls 1\n);
+ } elsif ( $options->{ "direction" } =~ /^r/ ) {
+ print $fh_in qq(plot "$backward_file" with lines ls 2\n);
+ }
+
+ close $fh_in;
+
+ while ( $line = <$fh_out> )
+ {
+ chomp $line;
+
+ push @lines, $line;
+ }
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ unlink $forward_file;
+ unlink $backward_file;
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> KARYOGRAM <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub karyogram
+{
+ # Martin A. Hansen, August 2007.
+
+ # Plot hits on a karyogram for a given genome.
+
+ my ( $data, # list of [ chr, beg, end ] triples
+ $options, # hashref with options
+ ) = @_;
+
+ # Returns string
+
+ my ( $karyo_file, $svg, $features, $karyo );
+
+ if ( $options->{ "genome" } eq "human" )
+ {
+ $karyo_file = "/Users/m.hansen/maasha/perl_scripts/biotools/karyo_data/human_cytobands.txt";
+# $karyo_file = "/home/m.hansen/maasha/perl_scripts/biotools/karyo_data/human_cytobands.txt";
+ }
+ else
+ {
+ $karyo_file = "/Users/m.hansen/maasha/perl_scripts/biotools/karyo_data/mouse_cytobands.txt";
+ # $karyo_file = "/home/m.hansen/maasha/perl_scripts/biotools/karyo_data/mouse_cytobands.txt";
+ }
+
+ $karyo = &parse_karyo_data( $karyo_file );
+
+ $svg = &init_svg;
+
+ &chromosome_layout( $svg, $karyo, $data );
+
+ return $svg->xmlify;
+}
+
+
+sub parse_karyo_data
+{
+ # X q26.1 129700001 130200000 gneg
+
+ # color: /etc/X11/rgb.txt
+
+ my ( $file,
+ ) = @_;
+
+ my ( $fh, $chr, $line, $name, $beg, $end, $color, %features, %color_hash );
+
+ %color_hash = (
+ acen => "DarkGrey",
+ gneg => "white",
+ gpos100 => "black",
+ gpos75 => "DarkGrey",
+ gpos66 => "DarkGrey",
+ gpos50 => "grey",
+ gpos33 => "LightGrey",
+ gpos25 => "LightGrey",
+ gvar => "LightGrey",
+ stalk => "DarkGrey",
+# gpos75 => "rgb(169,169,169)",
+# gpos66 => "gray66",
+# gpos66 => "#8e8e8e",
+# gpos50 => "gray50",
+# gpos33 => "#e3e3e3",
+# gpos33 => "gray33",
+# gpos25 => "gray25",
+# stalk => "rgb(169,169,169)",
+# stalk => "gray66",
+ );
+
+ $fh = &Maasha::Common::read_open( $file );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ next if $line =~ /^#/;
+
+ # ( $chr, $name, $beg, $end, $color ) = split "\t", $line;
+ ( $chr, $beg, $end, $name, $color ) = split "\t", $line;
+
+# if ( $color =~ /^gpos(\d+)/ ) {
+# $color = &color_intensity( $1 );
+# } elsif ( exists $color_hash{ $color } ) {
+ $color = $color_hash{ $color };
+# } else {
+# die qq(ERROR: Unknown color->$color\n);
+# }
+
+ if ( exists $features{ $chr } )
+ {
+ push @{ $features{ $chr } }, [ $name, $beg, $end, $color ];
+ }
+ else
+ {
+ $features{ $chr } = [ [ $name, $beg, $end, $color ] ];
+ }
+ }
+
+ close $fh;
+
+ return wantarray ? %features : \%features;
+}
+
+
+sub color_intensity
+{
+ # Martin A. Hansen, September 2007.
+
+ # Converts a gray scale intensity in percent to rgb.
+
+ my ( $percent, # color intensity
+ ) = @_;
+
+ # Returns string
+
+ my ( $num, $hex );
+
+ $num = int( $percent * 256 / 100 );
+
+ $num--;
+
+ $hex = sprintf "%x", $num;
+
+ return "#$hex$hex$hex";
+
+# return "rgb($num,$num,$num)";
+}
+
+
+sub init_svg
+{
+ # Martin A. Hansen, September 2005.
+
+ # initializes svg image.
+
+ # returns an image object
+
+ my $svg = SVG->new(
+ width => WIDTH,
+ height => HEIGHT,
+ style => {
+ 'stroke-width' => 1,
+ stroke => "black",
+ font => 'Helvetica',
+ },
+ );
+
+ return $svg;
+}
+
+
+sub chromosome_layout
+{
+ # Martin A. Hansen, January 2004 - August 2007.
+
+ # Plots all chromosomes in a single
+
+ my ( $svg, # image object
+ $karyo_list, # hashref with karyo data
+ $feat_list, # hashref with features
+ ) = @_;
+
+ # returns an image object
+
+ my ( $layout_obj, $i, $x, $y, $max, $factor, $chr_len, $chr_width, $chr_cent, $chr, $feat, $karyo, @list, $A, $B );
+
+ $layout_obj = $svg->group(
+ id => "chr_layout",
+ );
+
+ $max = $karyo_list->{ "chr1" }->[ -1 ]->[ 2 ];
+ $factor = ( HEIGHT / 2 ) / $max;
+ $chr_width = ( HEIGHT / 4 ) / 13;
+
+ foreach $karyo ( keys %{ $karyo_list } ) {
+ map { $_->[ 1 ] *= $factor; $_->[ 2 ] *= $factor } @{ $karyo_list->{ $karyo } };
+ }
+
+ foreach $feat ( keys %{ $feat_list } ) {
+ map { $_->[ 0 ] *= $factor; $_->[ 1 ] *= $factor } @{ $feat_list->{ $feat } };
+ }
+
+ @list = sort { $A = $a; $B = $b; $A =~ s/chr//; $B =~ s/chr//; $A <=> $B } keys %{ $karyo_list };
+
+ splice @list, 0, 2;
+ push @list, "chrX", "chrY";
+
+ $i = 0;
+
+ while ( $i < @list )
+ {
+ $chr = $list[ $i ];
+ $chr_len = $karyo_list->{ $chr }->[ -1 ]->[ 2 ];
+ $chr_cent = &find_cent( $karyo_list->{ $list[ $i ] } );
+
+ $y = HEIGHT / 2 - $chr_len;
+ $x = ( WIDTH / ( @list + 2 ) ) * ( $i + 1 );
+
+ &draw_chr( $layout_obj, $x, $y, $chr_len, $chr_width, $chr_cent, $chr, $karyo_list, $feat_list );
+
+ $i++;
+ }
+}
+
+
+sub find_cent
+{
+ # Martin A. Hansen, December 2003.
+
+ # Finds the centromeric region in the karyo data.
+
+ my ( $list ) = @_;
+
+ my ( $acen, @nums, $cent );
+
+ @{ $acen } = grep { grep { /^DarkGrey$/ } @{ $_ } } @{ $list };
+
+ push @nums, $acen->[ 0 ]->[ 1 ];
+ push @nums, $acen->[ 0 ]->[ 2 ];
+ push @nums, $acen->[ 1 ]->[ 1 ];
+ push @nums, $acen->[ 1 ]->[ 2 ];
+
+ @nums = sort { $a <=> $b } @nums;
+
+ $cent = ( $nums[ 1 ] + $nums[ 2 ] ) / 2;
+
+ return $cent;
+}
+
+
+sub draw_chr
+{
+ # Martin A. Hansen, December 2003.
+
+ # draws a whole cromosome with or without centromeric region
+
+ my ( $svg, # image object
+ $x, # x position
+ $y, # y position
+ $chr_len, # lenght of chromosome
+ $chr_width, # width of chromosome
+ $chr_cent, # position of centromeric region
+ $chr, # chromosome
+ $karyo_list, # hashref with karyo data
+ $feat_list, # hashref with features
+ ) = @_;
+
+ # returns image object
+
+ my ( $chr_obj, $clip_obj, $gr_obj );
+
+ $chr_obj = $svg->group(
+ id => $chr,
+ );
+
+ if ( exists $feat_list->{ $chr } ) {
+ &draw_chr_feat( $chr_obj, $x, $y, $chr_width, $feat_list->{ $chr } );
+ }
+
+ $clip_obj = $chr_obj->clipPath(
+ id => $chr . "_clipPath",
+ );
+
+ $clip_obj->rectangle(
+ x => sprintf( "%.3f", $x ),
+ y => sprintf( "%.3f", $y ),
+ width => sprintf( "%.3f", $chr_width ),
+ height => sprintf( "%.3f", $chr_cent ),
+ rx => 10,
+ ry => 10,
+ );
+
+ $clip_obj->rectangle(
+ x => sprintf( "%.3f", $x ),
+ y => sprintf( "%.3f", $y + $chr_cent ),
+ width => sprintf( "%.3f", $chr_width ),
+ height => sprintf( "%.3f", $chr_len - $chr_cent ),
+ rx => 10,
+ ry => 10,
+ );
+
+ $gr_obj = $chr_obj->group(
+ "clip-path" => "url(#$chr" . "_clipPath)",
+ );
+
+ if ( exists $karyo_list->{ $chr } ) {
+ &draw_karyo_data( $gr_obj, $x, $y, $chr_width, $karyo_list->{ $chr } );
+ }
+
+ $gr_obj->rectangle(
+ x => sprintf( "%.3f", $x ),
+ y => sprintf( "%.3f", $y ),
+ width => sprintf( "%.3f", $chr_width ),
+ height => sprintf( "%.3f", $chr_cent ),
+ fill => 'none',
+ rx => 10,
+ ry => 10,
+ );
+
+ $gr_obj->rectangle(
+ x => sprintf( "%.3f", $x ),
+ y => sprintf( "%.3f", $y + $chr_cent ),
+ width => sprintf( "%.3f", $chr_width ),
+ height => sprintf( "%.3f", $chr_len - $chr_cent ),
+ fill => 'none',
+ rx => 10,
+ ry => 10,
+ );
+
+ &draw_chr_num( $chr_obj, $x, $y, $chr_len, $chr_width, $chr );
+}
+
+
+sub draw_chr_num
+{
+ # Martin A. Hansen, December 2003.
+
+ # draws a cromosome number
+
+ my ( $svg, # image object
+ $x, # x position
+ $y, # y position
+ $chr_len, # lenght of chromosome
+ $chr_width, # width of chromosome
+ $chr, # chromosome number
+ ) = @_;
+
+ # returns image object
+
+ my ( $chr_num, $chars, @a, $word_width );
+
+ $chr_num = $chr;
+ $chr_num =~ s/chr//;
+
+ $chars = @a = split "", $chr_num;
+
+ $word_width = ( $chars * 8 ) / 2;
+
+ $svg->text(
+ x => sprintf("%.3f", $x + ( $chr_width / 2 ) - $word_width ),
+ y => sprintf("%.3f", $y + $chr_len + 15 ),
+ )->cdata( $chr_num );
+}
+
+
+sub draw_karyo_data
+{
+ # Martin A. Hansen, February 2004.
+
+ # Plots chromosome features
+
+ my ( $svg,
+ $x,
+ $y,
+ $chr_width,
+ $list,
+ ) = @_;
+
+ # returns an image object
+
+ my ( $feat_beg, $feat_end, $feat_height, $i, $color, $label );
+
+ for ( $i = 0; $i < @{ $list }; $i++ )
+ {
+ ( $label, $feat_beg, $feat_end, $color ) = @{ $list->[ $i ] };
+
+ $feat_height = $feat_end - $feat_beg;
+
+ $svg->rectangle(
+ x => sprintf("%.3f", $x ),
+ y => sprintf("%.3f", $y + $feat_beg ),
+ width => sprintf("%.3f", $chr_width ),
+ height => sprintf("%.3f", $feat_height ),
+ 'stroke-width' => 0,
+ fill => $color,
+ );
+ }
+}
+
+
+sub draw_chr_feat
+{
+ # Martin A. Hansen, February 2004.
+
+ # Plots chromosome features
+
+ my ( $svg,
+ $x,
+ $y,
+ $chr_width,
+ $list,
+ ) = @_;
+
+ # returns an image object
+
+ my ( $feat_beg, $feat_end, $feat_height, $i, $color, $height, $width, $x1, $y1, %lookup );
+
+ for ( $i = 0; $i < @{ $list }; $i++ )
+ {
+ ( $feat_beg, $feat_end, $color ) = @{ $list->[ $i ] };
+
+ $feat_height = $feat_end - $feat_beg;
+
+ $x1 = sprintf("%.0f", $x + ( $chr_width / 2 ) ),
+ $y1 = sprintf("%.0f", $y + $feat_beg ),
+ $width = sprintf("%.0f", ( $chr_width / 2 ) + 5 ),
+ $height = sprintf("%.0f", $feat_height );
+
+ if ( $height < 1 )
+ {
+ $height = 1;
+
+ if ( exists $lookup{ $x1 . $y1 } ) {
+ next;
+ } else {
+ $lookup{ $x1 . $y1 } = 1;
+ }
+ }
+
+ $svg->rectangle(
+ x => $x1,
+ y => $y1,
+ width => $width,
+ height => $height,
+ stroke => $color,
+ fill => $color,
+ );
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SEQUENCE LOGO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub seq_logo
+{
+ # Martin A. Hansen, August 2007.
+
+ # Calculates and renders a sequence logo in SVG format.
+
+ my ( $entries, # aligned sequence entries - list of tuples
+ ) = @_;
+
+ # Returns string.
+
+ my ( $type, $bit_max, $logo_data, $svg );
+
+ $type = &Maasha::Seq::seq_guess_type( $entries->[ 0 ]->[ 1 ] );
+
+ if ( $type =~ /^p/i ) {
+ $bit_max = 4;
+ } else {
+ $bit_max = 2;
+ }
+
+ $logo_data = &Maasha::Seq::seqlogo_calc( $bit_max, $entries );
+
+ $svg = &Maasha::Plot::svg_init();
+
+ &svg_draw_logo( $svg, $logo_data, $bit_max, $type );
+ &svg_draw_logo_scale( $svg, $bit_max );
+
+ return $svg->xmlify;
+}
+
+
+sub svg_init
+{
+ # Martin A. Hansen, October 2005.
+
+ # inititalizes SVG object, which is returned.
+
+ my $svg;
+
+ $svg = SVG->new(
+ style => {
+ 'font-weight' => 'normal',
+ 'font-family' => 'Courier New',
+ 'font-size' => 10,
+ },
+ );
+
+ return $svg;
+}
+
+
+sub svg_draw_logo
+{
+ # Martin A. Hansen, January 2007.
+
+ # Renders a sequence logo in SVG using a
+ # given data structure with logo details.
+
+ my ( $svg, # SVG object,
+ $logo_data, # data structure
+ $bit_max, # maximum bit height
+ $type, # sequence type
+ $nocolor, # render black and white - OPTIONAL
+ ) = @_;
+
+ my ( $pos, $elem, $char, $char_height_bit, $char_height_px, $block, $x, $y, $scale_factor, $color );
+
+ $x = 0;
+
+ foreach $pos ( @{ $logo_data } )
+ {
+ $y = 30;
+
+ foreach $elem ( @{ $pos } )
+ {
+ ( $char, $char_height_bit ) = @{ $elem };
+
+ $char_height_px = $char_height_bit * ( 30 / $bit_max );
+
+ $block = $svg->group(
+ transform => "translate($x,$y)",
+ );
+
+ $scale_factor = $char_height_px / 7;
+
+ if ( $nocolor ) {
+ $color = "black";
+ } elsif ( $type eq "dna" or $type eq "rna" ) {
+ $color = &Maasha::Seq::color_nuc( $char );
+ } else {
+ $color = &Maasha::Seq::color_pep( $char );
+ }
+
+ $block->text(
+ transform => "scale(1,$scale_factor)",
+ x => 0,
+ y => 0,
+ style => {
+ 'font-weight' => 'bold',
+ fill => &Maasha::Seq::color_palette( $color ),
+ }
+ )->cdata( $char );
+
+ $y -= $char_height_px;
+ }
+
+ $x += 7;
+ }
+}
+
+
+sub svg_draw_logo_scale
+{
+ # Martin A. Hansen, January 2007.
+
+ # draws the bit scale for the sequence logo
+
+ my ( $svg, # SVG object,
+ $bit_max, # maximum bit height
+ ) = @_;
+
+ my ( $scale, $i );
+
+ $scale = $svg->group(
+ transform => "translate(-10)",
+ style => {
+ stroke => 'black',
+ 'font-size' => '8px',
+ }
+ );
+
+ $svg->text(
+# transform => "translate(0,$logo_y)",
+ transform => "rotate(-90)",
+ x => -26,
+ y => -30,
+ style => {
+ stroke => 'none',
+ }
+ )->cdata( "bits" );
+
+ $scale->line(
+ x1 => 0,
+ x2 => 0,
+ y1 => 0,
+ y2 => 30,
+ );
+
+ for ( $i = 0; $i <= $bit_max; $i++ )
+ {
+ $scale->line(
+ x1 => -5,
+ x2 => 0,
+ y1 => ( 30 / $bit_max ) * $i,
+ y2 => ( 30 / $bit_max ) * $i,
+ );
+
+ $scale->text(
+ x => -13,
+ y => ( 30 / $bit_max ) * $i + 2,
+ style => {
+ stroke => 'none',
+ }
+ )->cdata( $bit_max - $i );
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+These are modules written by me (Martin A. Hansen aka Maasha).
+
+You are welcome to modify the code here, but do leave a note in
+the subroutines you change like this:
+
+ sub some_subroutine
+ {
+ # Martin A. Hansen, Jan 2008.
+
+ # Changed by <you>, Juli 2008. (fixed minor bug)
+
+ ...
+ }
+
+You may also add new subroutines, but you should strongly consider
+adding your own Perl modules subdirectory. For more information, see:
+
+ ../biopieces/code_perl/README
+
+
+
+Martin A. Hansen, July 2008
--- /dev/null
+package Maasha::SQL;
+
+# Copyright (C) 2006 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for manipulation of MySQL via the DBI module.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use warnings;
+
+use DBI;
+use Data::Dumper;
+
+use Maasha::Common;
+
+use vars qw( @ISA @EXPORT );
+use Exporter;
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub create_database
+{
+ my ( $database,
+ $user,
+ $password,
+ ) = @_;
+
+ system( "mysqladmin create $database --user=$user --password=$password" ) == 0 or
+ die qq(ERROR: Could not create database "$database"!\n);
+
+ return;
+}
+
+
+sub database_exists
+{
+ # Martin A. Hansen, May 2008.
+
+ # Checks if a given database exists. Returns 1 if so,
+ # otherwise 0.
+
+ my ( $database, # MySQL database
+ $user, # MySQL username
+ $pass, # MySQL password
+ ) = @_;
+
+ # Return boolean.
+
+ my ( @databases );
+
+ @databases = &list_databases( $user, $pass );
+
+ if ( grep /^$database$/i, @databases ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+
+sub list_databases
+{
+ # Martin A. Hansen, May 2008.
+
+ # Returns a list of databases available.
+
+ my ( $user, # MySQL username
+ $pass, # MySQL password
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @databases );
+
+ @databases = &Maasha::Common::run_and_return( "mysqlshow", "--user=$user --password=$pass" );
+
+ splice @databases, 0, 3;
+
+ pop @databases;
+
+ map { s/^\|\s+([^\s]+)\s+\|$/$1/ } @databases;
+
+ return wantarray ? @databases : \@databases;
+}
+
+
+sub request
+{
+ my ( $dbh,
+ $sql,
+ ) = @_;
+
+ my ( $sth, $errstr );
+
+ if ( not $sth = $dbh->prepare( $sql ) )
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "SQL PREPARE ERROR" );
+ }
+
+ if ( not $sth->execute )
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "SQL EXECUTE ERROR" );
+ }
+
+ return;
+}
+
+
+sub query_hash
+{
+ # Niels Larsen, April 2003.
+
+ # Executes a given sql query and returns the result as a hash
+ # or hash reference. The keys are set to the values of the given
+ # key.
+
+ my ( $dbh, # Database handle
+ $sql, # SQL string
+ $key, # Key string, like "id", "name", ..
+ ) = @_;
+
+ # Returns a hash.
+
+ my ( $sth, $hash, $errstr );
+
+ if ( not $sth = $dbh->prepare( $sql ) )
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "SQL PREPARE ERROR" );
+ }
+
+ if ( not $sth->execute )
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "SQL EXECUTE ERROR" );
+ }
+
+ if ( $hash = $sth->fetchall_hashref( $key ) )
+ {
+ return wantarray ? %{ $hash } : $hash;
+ }
+ else
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "DATABASE RETRIEVE ERROR" );
+ }
+
+ return;
+}
+
+
+sub query_array
+{
+ # Niels Larsen, April 2003.
+
+ # Executes a given sql query and returns the result as a table
+ # or table reference.
+
+ my ( $dbh, # Database handle
+ $sql, # SQL string
+ $out, # Output specification, see DBI documentation.
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $sth, $table, $errstr, @status );
+
+ if ( not $sth = $dbh->prepare( $sql ) )
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "SQL PREPARE ERROR" );
+ }
+
+ if ( not $sth->execute )
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "SQL EXECUTE ERROR" );
+ }
+
+ if ( $table = $sth->fetchall_arrayref( $out ) )
+ {
+ return wantarray ? @{ $table } : $table;
+ }
+ else
+ {
+ $errstr = $DBI::errstr;
+
+ &disconnect( $dbh );
+ die qq(ERROR: $errstr, "DATABASE RETRIEVE ERROR" );
+ }
+}
+
+
+sub query_hashref_list
+{
+ # Martin A. Hansen, May 2008.
+
+ # Executes a SQL query and return the result
+ # as a list of hashrefs.
+
+ my ( $dbh, # database handle
+ $sql, # sql query
+ ) = @_;
+
+ # Returns datastructure.
+
+ my $table = $dbh->selectall_arrayref( $sql, { Slice => {} } );
+
+ return wantarray ? @{ $table } : $table;
+}
+
+
+sub delete_table
+{
+ my ( $dbh,
+ $table,
+ ) = @_;
+
+ &request( $dbh, "drop table $table" );
+}
+
+
+sub list_tables
+{
+ my ( $dbh,
+ ) = @_;
+
+ my ( @list );
+
+ @list = &query_array( $dbh, "show tables" );
+
+ if ( @list ) {
+ @list = map { $_->[0] } @list;
+ } else {
+ @list = ();
+ }
+
+ return wantarray ? @list : \@list;
+}
+
+
+sub table_exists
+{
+ my ( $dbh,
+ $name,
+ ) = @_;
+
+ if ( grep /^$name$/, &list_tables( $dbh ) ) {
+ return 1;
+ } else {
+ return;
+ }
+}
+
+
+sub connect
+{
+ # Martin A. Hansen, May 2008.
+
+ # Given a database, user and password,
+ # obtains a database handle if the databse exists.
+
+ my ( $database, # MySQL database
+ $user, # MySQL user
+ $pass, # MySQL password
+ ) = @_;
+
+ # Returns object.
+
+ my ( $dbh );
+
+ &Maasha::Common::error( qq(Database "$database" does not exist) ) if not &database_exists( $database, $user, $pass );
+
+ $dbh = DBI->connect(
+ "dbi:mysql:$database",
+ $user,
+ $pass,
+ {
+ RaiseError => 0,
+ PrintError => 0,
+ AutoCommit => 0,
+ ShowErrorStatement => 1,
+ }
+ );
+
+ if ( $dbh ) {
+ return $dbh;
+ } else {
+ &Maasha::Common::error( qq($DBI::errstr) );
+ }
+}
+
+
+sub disconnect
+{
+ my ( $dbh,
+ ) = @_;
+
+ if ( not $dbh->disconnect )
+ {
+ die qq(ERROR: $DBI::errstr );
+ }
+}
+
+
+sub update_field
+{
+ # Martin A. Hansen, April 2003.
+
+ # updates the content of a single table cell
+
+ my ( $dbh, # database handle
+ $table, # table name
+ $column, # column where updating
+ $old_val, # the old cell content
+ $new_val, # the new cell content
+ ) = @_;
+
+ my ( $sql, $count, $count_sql );
+
+ $count_sql = qq( SELECT $column FROM $table WHERE $column="$old_val"; );
+
+ $count = scalar &query_array( $dbh, $count_sql );
+
+ if ( $count > 1 )
+ {
+ warn qq(WARNING: More than one entry found "$count_sql"\n);
+ }
+ elsif ( $count == 0 )
+ {
+ &disconnect( $dbh );
+ die qq(ERROR: entry not found "$count_sql"\n);
+ }
+ else
+ {
+ $sql = qq( UPDATE $table SET $column="$new_val" WHERE $column="$old_val"; );
+ &request( $dbh, $sql );
+ }
+
+ return;
+}
+
+
+sub delete_row
+{
+ # Martin A. Hansen, April 2003.
+
+ # deletes a record form a table
+
+ my ( $dbh, # database handle
+ $table, # table name
+ $field, # field e.g. rec no
+ $pattern, # specific pattern
+ ) = @_;
+
+ my $sql;
+
+ $sql = qq(DELETE FROM $table WHERE $field = "$pattern";);
+
+ &request( $dbh, $sql );
+
+ return;
+}
+
+
+sub add_row
+{
+ # Martin A. Hansen, April 2003.
+
+ # adds a record to a table;
+
+ my ( $dbh, # database handle
+ $table, # table name
+ $fields, # row to be inserted
+ ) = @_;
+
+ my ( $sql, $field, @fields, $quote_sql );
+
+ foreach $field ( @{ $fields } )
+ {
+ if ( $field eq "NULL" or $field eq '' ) {
+ push @fields, "NULL";
+ } else {
+ push @fields, $dbh->quote( $field );
+ }
+ }
+
+ $sql = "INSERT INTO $table VALUES ( " . join( ", ", @fields ) . " );";
+
+ &request( $dbh, $sql );
+
+ return;
+}
+
+
+sub add_column
+{
+ # Martin A. Hansen, April 2003.
+
+ # inserts a column in a table
+
+ my ( $dbh, # database handle
+ $table, # table name
+ $column, # name of column
+ $type, # variable type
+ $index, # enable index
+ ) = @_;
+
+ my $sql;
+
+ if ( $index ) {
+ $sql = "ALTER TABLE $table ADD COLUMN ( $column $type, INDEX $column" . "_index ( $column ) );";
+ } else {
+ $sql = "ALTER TABLE $table ADD COLUMN ( $column $type );";
+ }
+
+ &request( $dbh, $sql );
+
+ return;
+}
+
+
+sub del_column
+{
+ # Martin A. Hansen, April 2003.
+
+ # deletes a column from a table
+
+ my ( $dbh, # databse handle
+ $table, # table name
+ $column, # column to be deleted
+ ) = @_;
+
+ my $sql;
+
+ $sql = "ALTER TABLE $table DROP COLUMN $column;";
+
+ &request( $dbh, $sql );
+
+ return;
+}
+
+
+sub load_sql_file
+{
+ # Martin A. Hansen, January 2004.
+
+ # loads , seperated file in to sql table
+
+ my ( $dbh, # database handle object
+ $path, # filename with path
+ $table, # table to load data into
+ $delimiter, # column delimiter - OPTIONAL
+ ) = @_;
+
+ # returns database handle object
+
+ my $sql;
+
+ $delimiter ||= "\t";
+
+ $sql = qq( LOAD DATA LOCAL INFILE "$path" INTO TABLE $table FIELDS TERMINATED BY '$delimiter' );
+
+ &SQL::request( $dbh, $sql );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::Seq;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# yak yak yak
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use IPC::Open2;
+use List::Util qw( shuffle );
+use Time::HiRes qw( gettimeofday );
+
+use vars qw ( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub seq_guess_type
+{
+ # Martin A. Hansen, May 2007.
+
+ # Makes a qualified guess on the type of a given squence.
+
+ my ( $seq, # sequence to check
+ ) = @_;
+
+ # returns string.
+
+ my ( $check_seq, $count );
+
+ if ( length $seq > 100 ) {
+ $check_seq = substr $seq, 0, 100;
+ } else {
+ $check_seq = $seq;
+ }
+
+ if ( $count = $check_seq =~ tr/FLPQIEflpqie// and $count > 0 ) {
+ return "protein";
+ } elsif ( $count = $check_seq =~ tr/Uu// and $count > 0 ) {
+ return "rna";
+ } else {
+ return "dna";
+ }
+}
+
+
+sub wrap
+{
+ # Martin A. Hansen, July 2007.
+
+ # Wraps a given string reference accoring to given width.
+
+ my ( $strref, # ref to sting to wrap
+ $wrap, # wrap width
+ ) = @_;
+
+ # Returns nothing.
+
+ ${ $strref } =~ s/(.{$wrap})/$1\n/g;
+
+ chomp ${ $strref };
+}
+
+
+sub dna_revcomp
+{
+ # Niels Larsen
+ # modified Martin A. Hansen, March 2005.
+
+ # Returns the reverse complement of a dna sequence with preservation of case
+ # according to this mapping,
+ #
+ # AGCUTRYWSMKHDVBNagcutrywsmkhdvbn
+ # TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn
+
+ my ( $seq, # seq
+ ) = @_;
+
+ # returns string
+
+ $seq = reverse $seq;
+
+ $seq =~ tr/AGCUTRYWSMKHDVBNagcutrywsmkhdvbn/TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn/;
+
+ return $seq;
+}
+
+
+sub rna_revcomp
+{
+ # Niels Larsen
+ # modified Martin A. Hansen, March 2005.
+
+ # Returns the complement of a rna sequence with preservation of case
+ # according to this mapping,
+ #
+ # AGCUTRYWSMKHDVBNagcutrywsmkhdvbn
+ # UCGAAYRWSKMDHBVNucgaayrwskmdhbvn
+
+ my ( $seq, # seq
+ ) = @_;
+
+ $seq = reverse $seq;
+
+ $seq =~ tr/AGCUTRYWSMKHDVBNagcutrywsmkhdvbn/UCGAAYRWSKMDHBVNucgaayrwskmdhbvn/;
+
+ return $seq;
+}
+
+
+sub dna_comp
+{
+ # Niels Larsen
+ # modified Martin A. Hansen, March 2005.
+
+ # Returns the reverse complement of a dna sequence with preservation of case
+ # according to this mapping,
+ #
+ # AGCUTRYWSMKHDVBNagcutrywsmkhdvbn
+ # TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn
+
+ my ( $seqref, # seqref
+ ) = @_;
+
+ # Returns nothing.
+
+ ${ $seqref } =~ tr/AGCUTRYWSMKHDVBNagcutrywsmkhdvbn/TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn/;
+}
+
+
+sub rna_comp
+{
+ # Niels Larsen
+ # modified Martin A. Hansen, March 2005.
+
+ # Returns the complement of a rna sequence with preservation of case
+ # according to this mapping,
+ #
+ # AGCUTRYWSMKHDVBNagcutrywsmkhdvbn
+ # UCGAAYRWSKMDHBVNucgaayrwskmdhbvn
+
+ my ( $seqref, # seqref
+ ) = @_;
+
+ # Returns nothing.
+
+ ${ $seqref } =~ tr/AGCUTRYWSMKHDVBNagcutrywsmkhdvbn/UCGAAYRWSKMDHBVNucgaayrwskmdhbvn/;
+}
+
+
+sub dna2rna
+{
+ # Martin A. Hansen, March 2007
+
+ # Converts DNA sequence to RNA
+
+ my ( $seq, # nucleotide sequence
+ ) = @_;
+
+ # returns string
+
+ $seq =~ tr/Tt/Uu/;
+
+ return $seq;
+}
+
+
+sub rna2dna
+{
+ # Martin A. Hansen, March 2007
+
+ # Converts RNA sequence to DNA
+
+ my ( $seq, # nucleotide sequence
+ ) = @_;
+
+ # returns string
+
+ $seq =~ tr/Uu/Tt/;
+
+ return $seq;
+}
+
+
+sub nuc2ambiguity
+{
+ # Martin A. Hansen, March 2005.
+
+ # given a string of nucleotides
+ # returns the corresponding ambiguity code
+
+ my ( $str,
+ $type, # DNA or RNA - DEFAULT DNA
+ ) = @_;
+
+ my ( %hash, @nts, $key, $code, %nt_hash );
+
+ $str = uc $str;
+
+ if ( not $type or $type =~ /dna/i )
+ {
+ $str =~ s/N/ACGT/g;
+ $str =~ s/B/CGT/g;
+ $str =~ s/D/AGT/g;
+ $str =~ s/H/ACT/g;
+ $str =~ s/V/ACG/g;
+ $str =~ s/K/GT/g;
+ $str =~ s/Y/CT/g;
+ $str =~ s/S/CG/g;
+ $str =~ s/W/AT/g;
+ $str =~ s/R/AG/g;
+ $str =~ s/M/AC/g;
+ }
+ else
+ {
+ $str =~ s/N/ACGU/g;
+ $str =~ s/B/CGU/g;
+ $str =~ s/D/AGU/g;
+ $str =~ s/H/ACU/g;
+ $str =~ s/V/ACG/g;
+ $str =~ s/K/GU/g;
+ $str =~ s/Y/CU/g;
+ $str =~ s/S/CG/g;
+ $str =~ s/W/AU/g;
+ $str =~ s/R/AG/g;
+ $str =~ s/M/AC/g;
+ }
+
+ @nts = split //, $str;
+
+ %nt_hash = map { $_ => 1 } @nts;
+
+ @nts = sort keys %nt_hash;
+
+ $key = join "", @nts;
+
+ %hash = (
+ 'A' => 'A',
+ 'C' => 'C',
+ 'G' => 'G',
+ 'T' => 'T',
+ 'U' => 'U',
+ 'AC' => 'M',
+ 'AG' => 'R',
+ 'AT' => 'W',
+ 'AU' => 'W',
+ 'CG' => 'S',
+ 'CT' => 'Y',
+ 'CU' => 'Y',
+ 'GT' => 'K',
+ 'GU' => 'K',
+ 'ACG' => 'V',
+ 'ACT' => 'H',
+ 'ACU' => 'H',
+ 'AGT' => 'D',
+ 'AGU' => 'D',
+ 'CGT' => 'B',
+ 'CGU' => 'B',
+ 'ACGT' => 'N',
+ 'ACGU' => 'N',
+ );
+
+ $code = $hash{ $key };
+
+ warn qq(WARNING: No ambiguity code for key->$key\n) if not $code;
+
+ return $code;
+}
+
+
+sub aa2codons
+{
+ # Martin A. Hansen, March 2005.
+
+ # given an amino acid, returns a list of corresponding codons
+
+ my ( $aa, # amino acid to translate
+ ) = @_;
+
+ # returns list
+
+ my ( %hash, $codons );
+
+ $aa = uc $aa;
+
+ %hash = (
+ 'F' => [ 'TTT', 'TTC' ], # Phe
+ 'L' => [ 'TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG' ], # Leu
+ 'S' => [ 'TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC' ], # Ser
+ 'Y' => [ 'TAT', 'TAC' ], # Tyr
+ '*' => [ 'TAA', 'TAG', 'TGA' ], # Stop
+ 'X' => [ 'TAA', 'TAG', 'TGA' ], # Stop
+ 'C' => [ 'TGT', 'TGC' ], # Cys
+ 'W' => [ 'TGG' ], # Trp
+ 'P' => [ 'CCT', 'CCC', 'CCA', 'CCG' ], # Pro
+ 'H' => [ 'CAT', 'CAC' ], # His
+ 'Q' => [ 'CAA', 'CAG' ], # Gln
+ 'R' => [ 'CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG' ], # Arg
+ 'I' => [ 'ATT', 'ATC', 'ATA' ], # Ile
+ 'M' => [ 'ATG' ], # Met
+ 'T' => [ 'ACT', 'ACC', 'ACA', 'ACG' ], # Thr
+ 'N' => [ 'AAT', 'AAC' ], # Asn
+ 'K' => [ 'AAA', 'AAG' ], # Lys
+ 'V' => [ 'GTT', 'GTC', 'GTA', 'GTG' ], # Val
+ 'A' => [ 'GCT', 'GCC', 'GCA', 'GCG' ], # Ala
+ 'D' => [ 'GAT', 'GAC' ], # Asp
+ 'E' => [ 'GAA', 'GAG' ], # Glu
+ 'G' => [ 'GGT', 'GGC', 'GGA', 'GGG' ], # Gly
+ );
+
+ $codons = $hash{ $aa };
+
+ return wantarray ? @{ $codons } : $codons;
+}
+
+
+sub codon2aa
+{
+ # Martin A. Hansen, March 2005.
+
+ # given a codon, returns the correponding
+ # vertebrate amino acid.
+
+ my ( $codon, # codon to translate
+ ) = @_;
+
+ # returns string
+
+ my ( %hash, $aa );
+
+ die qq(ERROR: Bad codon: "$codon"\n) if not $codon =~ /[ATCGatcg]{3}/;
+
+ %hash = (
+ 'TTT' => 'F', # Phe
+ 'TTC' => 'F', # Phe
+ 'TTA' => 'L', # Leu
+ 'TTG' => 'L', # Leu
+ 'TCT' => 'S', # Ser
+ 'TCC' => 'S', # Ser
+ 'TCA' => 'S', # Ser
+ 'TCG' => 'S', # Ser
+ 'TAT' => 'Y', # Tyr
+ 'TAC' => 'Y', # Tyr
+ 'TAA' => '*', # Stop
+ 'TAG' => '*', # Stop
+ 'TGT' => 'C', # Cys
+ 'TGC' => 'C', # Cys
+ 'TGA' => '*', # Stop
+ 'TGG' => 'W', # Trp
+ 'CTT' => 'L', # Leu
+ 'CTC' => 'L', # Leu
+ 'CTA' => 'L', # Leu
+ 'CTG' => 'L', # Leu
+ 'CCT' => 'P', # Pro
+ 'CCC' => 'P', # Pro
+ 'CCA' => 'P', # Pro
+ 'CCG' => 'P', # Pro
+ 'CAT' => 'H', # His
+ 'CAC' => 'H', # His
+ 'CAA' => 'Q', # Gln
+ 'CAG' => 'Q', # Gln
+ 'CGT' => 'R', # Arg
+ 'CGC' => 'R', # Arg
+ 'CGA' => 'R', # Arg
+ 'CGG' => 'R', # Arg
+ 'ATT' => 'I', # Ile
+ 'ATC' => 'I', # Ile
+ 'ATA' => 'I', # Ile
+ 'ATG' => 'M', # Met
+ 'ACT' => 'T', # Thr
+ 'ACC' => 'T', # Thr
+ 'ACA' => 'T', # Thr
+ 'ACG' => 'T', # Thr
+ 'AAT' => 'N', # Asn
+ 'AAC' => 'N', # Asn
+ 'AAA' => 'K', # Lys
+ 'AAG' => 'K', # Lys
+ 'AGT' => 'S', # Ser
+ 'AGC' => 'S', # Ser
+ 'AGA' => 'R', # Arg
+ 'AGG' => 'R', # Arg
+ 'GTT' => 'V', # Val
+ 'GTC' => 'V', # Val
+ 'GTA' => 'V', # Val
+ 'GTG' => 'V', # Val
+ 'GCT' => 'A', # Ala
+ 'GCC' => 'A', # Ala
+ 'GCA' => 'A', # Ala
+ 'GCG' => 'A', # Ala
+ 'GAT' => 'D', # Asp
+ 'GAC' => 'D', # Asp
+ 'GAA' => 'E', # Glu
+ 'GAG' => 'E', # Glu
+ 'GGT' => 'G', # Gly
+ 'GGC' => 'G', # Gly
+ 'GGA' => 'G', # Gly
+ 'GGG' => 'G', # Gly
+ );
+
+ $aa = $hash{ uc $codon };
+
+ return $aa;
+}
+
+
+sub translate
+{
+ # Martin A. Hansen, June 2005.
+
+ # translates a dna sequence to protein according to a optional given
+ # frame.
+
+ my ( $dna, # dna sequence
+ $frame, # frame of translation - OPTIONAL
+ ) = @_;
+
+ # returns string
+
+ my ( $codon, $pos, $pep );
+
+ $frame ||= 1;
+
+ if ( $frame =~ /-?[1-3]/ )
+ {
+ if ( $frame < 0 ) {
+ $dna = &Maasha::Seq::dna_revcomp( $dna );
+ }
+
+ $frame = abs( $frame ) - 1;
+
+ $dna =~ s/^.{${frame}}//;
+ }
+ else
+ {
+ &Maasha::Common::error( qq(Badly formated frame "$frame") );
+ }
+
+ $pos = 0;
+
+ while ( $codon = substr $dna, $pos, 3 )
+ {
+ last if not length $codon == 3;
+
+ $pep .= &codon2aa( $codon );
+
+ $pos += 3;
+ }
+
+ return $pep;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RNA FOLDING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub fold_struct_rnafold
+{
+ # Martin A. Hansen, February 2008.
+
+ # Given a squence fold this using RNAfold.
+
+ my ( $seq, # sequence to fold
+ ) = @_;
+
+ # Returns a tuple of fold string and free energy.
+
+ my ( $pid, $fh_out, $fh_in, @lines, $struct, $energy );
+
+ $pid = open2( $fh_out, $fh_in, "RNAfold -noPS" );
+
+ &Maasha::Fasta::put_entry( [ "RNAfold", $seq ], $fh_in );
+
+ close $fh_in;
+
+ @lines = <$fh_out>;
+
+ close $fh_out;
+
+ waitpid $pid, 0;
+
+ chomp @lines;
+
+ if ( $lines[ - 1 ] =~ /^([^ ]+) \((.+)\)$/ )
+ {
+ $struct = $1;
+ $energy = $2;
+ }
+
+ return wantarray ? ( $struct, $energy ) : [ $struct, $energy ];
+}
+
+
+sub fold_struct_contrastruct
+{
+ # Martin A. Hansen, February 2008.
+
+ # Given a sequence fold this using Contrafold.
+
+ my ( $seq, # sequence to fold
+ $tmp_dir, # temporary directory - OPTIONAL
+ ) = @_;
+
+ # Returns a tuple of fold string and temp index.
+
+ my ( $tmp_file, $out_file1, $out_file2, $fh, $line, $struct, @AoA, $i, $temp, $index );
+
+ $tmp_dir ||= $ENV{ 'TMP_DIR' };
+
+ $tmp_file = "$tmp_dir/fold.fna";
+ $out_file1 = "$tmp_dir/fold.out1";
+ $out_file2 = "$tmp_dir/fold.out2";
+
+ &Maasha::Fasta::put_entries( [ [ "fold", $seq ] ], $tmp_file );
+
+ &Maasha::Common::run( "contrafold", "predict --parens $out_file1 --bpseq $out_file2 $tmp_file" );
+
+ unlink $tmp_file;
+
+ $fh = &Maasha::Common::read_open( $out_file1 );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ $struct = $line;
+ }
+
+ close $fh;
+
+ unlink $out_file1;
+
+ $fh = &Maasha::Common::read_open( $out_file2 );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ push @AoA, [ split " ", $line ];
+ }
+
+ close $fh;
+
+ unlink $out_file2;
+
+ for ( $i = 0; $i < @AoA; $i++ )
+ {
+ if ( $AoA[ $i ]->[ 2 ] != 0 )
+ {
+ last if $AoA[ $i ]->[ 0 ] > $AoA[ $i ]->[ 2 ];
+
+ $temp += &base_pair_melting_temp( $AoA[ $i ]->[ 1 ] . $AoA[ $AoA[ $i ]->[ 2 ] - 1 ]->[ 1 ] );
+ }
+ }
+
+ $index = sprintf( "%.2f", $temp / length $seq );
+
+ return wantarray ? ( $struct, $index ) : [ $struct, $index ];
+}
+
+
+sub base_pair_melting_temp
+{
+ # Martin A. Hansen, February 2008.
+
+ # Given a basepair, returns the melting temperature.
+
+ my ( $bp, # basepair string
+ ) = @_;
+
+ # Returns integer
+
+ my ( %melt_hash );
+
+ %melt_hash = (
+ AA => 0,
+ AT => 2,
+ AC => 0,
+ AG => 0,
+ AU => 2,
+ TA => 2,
+ TT => 0,
+ TC => 0,
+ TG => 1, #
+ TU => 0,
+ CA => 0,
+ CT => 0,
+ CC => 0,
+ CG => 4,
+ CU => 0,
+ GA => 0,
+ GT => 1, #
+ GC => 4,
+ GG => 0,
+ GU => 1, #
+ UA => 2,
+ UT => 0,
+ UC => 0,
+ UG => 1, #
+ UU => 0,
+ );
+
+ return $melt_hash{ uc $bp };
+}
+
+
+sub generate_dna_oligos
+{
+ # Martin A. Hansen, April 2007.
+
+ # Generates all possible DNA oligos of a given wordsize.
+
+ # alternative way: perl -MData::Dumper -e '@CONV = glob( "{T,C,A,G}" x 4 ); print Dumper( \@CONV )'
+
+
+ my ( $wordsize, # size of DNA oligos
+ ) = @_;
+
+ # Returns list
+
+ my ( @alph, @oligos, $oligo, $char, @list );
+
+ @alph = ( qw( A T C G N ) );
+ @oligos = ( '' );
+
+ for ( 1 .. $wordsize )
+ {
+ foreach $oligo ( @oligos )
+ {
+ foreach $char ( @alph ) {
+ push @list, $oligo . $char;
+ }
+ }
+
+ @oligos = @list;
+
+ undef @list;
+ }
+
+ return wantarray ? @oligos : \@oligos;
+}
+
+
+sub seq2oligos
+{
+ # Martin A. Hansen, April 2007
+
+ # Given a sequence and a wordsize,
+ # breaks the sequence into overlapping
+ # oligos of that wordsize.
+
+ my ( $seq, # sequence reference
+ $wordsize, # wordsize
+ ) = @_;
+
+ # returns list
+
+ my ( $i, $oligo, @oligos );
+
+ for ( $i = 0; $i < length( ${ $seq } ) - $wordsize + 1; $i++ )
+ {
+ $oligo = substr ${ $seq }, $i, $wordsize;
+
+ push @oligos, $oligo;
+ }
+
+ return wantarray ? @oligos : \@oligos;
+}
+
+
+sub seq2oligos_uniq
+{
+ # Martin A. Hansen, April 2007
+
+ # Given a sequence and a wordsize,
+ # breaks the sequence into overlapping
+ # oligos of that wordsize and return
+ # only unique words.
+
+ my ( $seq, # sequence reference
+ $wordsize, # wordsize
+ ) = @_;
+
+ # returns list
+
+ my ( $i, $oligo, %lookup, @oligos );
+
+ for ( $i = 0; $i < length( ${ $seq } ) - $wordsize + 1; $i++ )
+ {
+ $oligo = substr ${ $seq }, $i, $wordsize;
+
+ if ( not exists $lookup{ $oligo } )
+ {
+ push @oligos, $oligo;
+ $lookup{ $oligo } = 1;
+ }
+ }
+
+ return wantarray ? @oligos : \@oligos;
+}
+
+
+sub oligo_freq
+{
+ # Martin A. Hansen, August 2007.
+
+ # Given a hashref with oligo=>count, calculates
+ # a frequency table. Returns a list of hashes
+
+ my ( $oligo_freq, # hashref
+ ) = @_;
+
+ # Returns data structure
+
+ my ( @freq_table, $total );
+
+ $total = 0;
+
+ map { push @freq_table, { OLIGO => $_, COUNT => $oligo_freq->{ $_ } }; $total += $oligo_freq->{ $_ } } keys %{ $oligo_freq };
+
+ @freq_table = sort { $b->{ "COUNT" } <=> $a->{ "COUNT" } or $a->{ "OLIGO" } cmp $b->{ "OLIGO" } } @freq_table;
+
+ map { $_->{ "FREQ" } = sprintf( "%.4f", $_->{ "COUNT" } / $total ) } @freq_table;
+
+ return wantarray ? return @freq_table : \@freq_table;
+}
+
+
+sub seq_generate
+{
+ # Martin A. Hansen, May 2007
+
+ # Generates a random sequence given a sequence length
+ # and a alphabet.
+
+ my ( $len, # sequence length
+ $alph, # sequence alphabet
+ ) = @_;
+
+ # returns string
+
+ my ( $alph_len, $i, $seq );
+
+ $alph_len = scalar @{ $alph };
+
+ for ( $i = 0; $i < $len; $i++ ) {
+ $seq .= $alph->[ int( rand( $alph_len ) ) ];
+ }
+
+ return $seq;
+}
+
+
+sub seq_shuffle
+{
+ # Martin A. Hansen, December 2007.
+
+ # Shuffles sequence of a given string.
+
+ my ( $seq, # sequence string
+ ) = @_;
+
+ # Returns string.
+
+ my ( @list );
+
+ @list = split "", $seq;
+
+ return join "", shuffle( @list );
+}
+
+
+sub seq_alph
+{
+ # Martin A. Hansen, May 2007.
+
+ # returns a requested alphabet
+
+ my ( $type, # alphabet type
+ ) = @_;
+
+ # returns list
+
+ my ( @alph );
+
+ if ( $type =~ /^dna$/i ) {
+ @alph = qw( A T C G );
+ } elsif ( $type =~ /^rna$/i ) {
+ @alph = qw( A U C G );
+ } elsif ( $type =~ /^prot/i ) {
+ @alph = qw( F L S Y C W P H Q R I M T N K V A D E G );
+ } else {
+ die qq(ERROR: Unknown alphabet type: "$type"\n);
+ }
+
+ return wantarray ? @alph : \@alph;
+}
+
+
+sub seq_analyze
+{
+ # Martin A. Hansen, August 2007.
+
+ # Analyses the sequence composition of a given sequence.
+
+ my ( $seq, # sequence to analyze
+ ) = @_;
+
+ # Returns hash
+
+ my ( %analysis, @chars, @chars_lc, $char, %char_hash, $gc, $at, $lc, $max, $res_sum, @indels, %indel_hash );
+
+ $analysis{ "SEQ_TYPE" } = uc &Maasha::Seq::seq_guess_type( $seq );
+ $analysis{ "SEQ_LEN" } = length $seq;
+
+ @indels = qw( - ~ . _ );
+
+ if ( $analysis{ "SEQ_TYPE" } eq "DNA" )
+ {
+ @chars = split //, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn";
+ @chars_lc = split //, "agcutrywsmkhdvbn";
+ }
+ elsif ( $analysis{ "SEQ_TYPE" } eq "RNA" )
+ {
+ @chars = split //, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn";
+ @chars_lc = split //, "agcutrywsmkhdvbn";
+ }
+ else
+ {
+ @chars = split //, "FLSYCWPHQRIMTNKVADEGflsycwphqrimtnkvadeg";
+ @chars_lc = split //, "flsycwphqrimtnkvadeg";
+ }
+
+ @char_hash{ @chars } = map { eval "scalar \$seq =~ tr/$_//" } @chars;
+ @indel_hash{ @indels } = map { eval "scalar \$seq =~ tr/$_//" } @indels;
+
+ if ( $analysis{ "SEQ_TYPE" } =~ /DNA|RNA/ )
+ {
+ $gc = $char_hash{ "g" } + $char_hash{ "G" } + $char_hash{ "c" } + $char_hash{ "C" };
+ $at = $char_hash{ "a" } + $char_hash{ "A" } + $char_hash{ "t" } + $char_hash{ "T" } + $char_hash{ "u" } + $char_hash{ "U" };
+
+ $analysis{ "GC%" } = sprintf( "%.2f", 100 * $gc / $analysis{ "SEQ_LEN" } );
+
+ map { $lc += $char_hash{ lc $_ } } @chars_lc;
+
+ $analysis{ "SOFT_MASK%" } = sprintf( "%.2f", 100 * $lc / $analysis{ "SEQ_LEN" } );
+ $analysis{ "HARD_MASK%" } = sprintf( "%.2f", 100 * ( $char_hash{ "n" } + $char_hash{ "N" } ) / $analysis{ "SEQ_LEN" } );
+ }
+
+ $max = 0;
+
+ foreach $char ( @chars_lc )
+ {
+ $char = uc $char;
+
+ $char_hash{ $char } += $char_hash{ lc $char };
+
+ $analysis{ "RES:$char" } = $char_hash{ $char };
+
+ $max = $char_hash{ $char } if $char_hash{ $char } > $max;
+
+ $analysis{ "RES_SUM" } += $char_hash{ $char };
+ }
+
+ map { $analysis{ "RES:$_" } = $indel_hash{ $_ } } @indels;
+
+ $analysis{ "MIX_INDEX" } = sprintf( "%.2f", $max / $analysis{ "SEQ_LEN" } );
+ $analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
+
+ return wantarray ? %analysis : \%analysis;
+}
+
+
+sub seq_complexity
+{
+ # Martin A. Hansen, May 2008.
+
+ # Given a sequence computes a complexity index
+ # as the most common di-residue over
+ # the sequence length. Return ~1 if the entire
+ # sequence is homopolymeric. Above 0.4 indicates
+ # low complexity sequence.
+
+ my ( $seq, # sequence
+ ) = @_;
+
+ # Returns float.
+
+ my ( $len, $i, $max, $di, %hash );
+
+ $seq = uc $seq;
+ $len = length $seq;
+ $max = 0;
+
+ for ( $i = 0; $i < $len - 1; $i++ ) {
+ $hash{ substr $seq, $i, 2 }++;
+ }
+
+ foreach $di ( keys %hash ) {
+ $max = $hash{ $di } if $hash{ $di } > $max;
+ }
+
+ return $max / $len;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SEQLOGO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub seqlogo_calc
+{
+ # Martin A. Hansen, January 2007.
+
+ # given max bit size and a list of aligned entries
+ # in FASTA format, calculates for each sequence position
+ # the height of the letters in bits.
+ # returns a data structure with [ letter, height ] tuples
+ # for all letters at each position.
+
+ my ( $bit_max, # maximum bit height
+ $entries, # FASTA entries
+ ) = @_;
+
+ # returns data structure
+
+ my ( $logo_len, $char_tot, $i, %char_hash, $bit_height, $bit_diff, $char_heights, @logo );
+
+ $logo_len = length $entries->[ 0 ]->[ 1 ];
+ $char_tot = scalar @{ $entries };
+
+ for ( $i = 0; $i < $logo_len; $i++ )
+ {
+ undef %char_hash;
+
+ map { $char_hash{ uc substr( $_->[ 1 ], $i, 1 ) }++ } @{ $entries };
+
+ delete $char_hash{ "-" };
+ delete $char_hash{ "_" };
+ delete $char_hash{ "~" };
+ delete $char_hash{ "." };
+
+ $bit_height = &seqlogo_calc_bit_height( \%char_hash, $char_tot );
+
+ $bit_diff = $bit_max - $bit_height;
+
+ $char_heights = &seqlogo_calc_char_heights( \%char_hash, $char_tot, $bit_diff );
+
+ push @logo, $char_heights;
+ }
+
+ return wantarray ? @logo : \@logo;
+}
+
+
+sub seqlogo_calc_bit_height
+{
+ # Martin A. Hansen, January 2007.
+
+ # calculates the bit height using Shannon's famous
+ # general formula for uncertainty as documentet:
+ # http://www.ccrnp.ncifcrf.gov/~toms/paper/hawaii/latex/node5.html
+
+ my ( $char_hash, # hashref with chars and frequencies
+ $tot, # total number of chars
+ ) = @_;
+
+ # returns float
+
+ my ( $char, $freq, $bit_height );
+
+ foreach $char ( keys %{ $char_hash } )
+ {
+ $freq = $char_hash->{ $char } / $tot;
+
+ $bit_height += $freq * ( log( $freq ) / log( 2 ) );
+ }
+
+ $bit_height *= -1;
+
+ return $bit_height;
+}
+
+
+sub seqlogo_calc_char_heights
+{
+ # Martin A. Hansen, January 2007.
+
+ # calculates the hight of each char in bits, and sorts
+ # according to height.
+
+ my ( $char_hash, # hashref with chars and frequencies
+ $tot, # tot number of chars
+ $bit_diff, # information gained from uncertainties
+ ) = @_;
+
+ # returns list of tuples
+
+ my ( $char, $freq, $char_height, @char_heights );
+
+ foreach $char ( keys %{ $char_hash } )
+ {
+ $freq = $char_hash->{ $char } / $tot;
+
+ $char_height = $freq * $bit_diff; # char height in bits
+
+ push @char_heights, [ $char, $char_height ];
+ }
+
+ @char_heights = sort { $a->[ 1 ] <=> $b->[ 1 ] } @char_heights;
+
+ return wantarray ? @char_heights : \@char_heights;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> RESIDUE COLORS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub color_pep
+{
+ # Martin A. Hansen, October 2005.
+
+ # color scheme for proteins as defined in Mview.
+ # given a char returns the appropriate color.
+ # The amino acids are colored according to physicochemical properties:
+ # bright green = hydrophobic; dark green = large hydrophobic;
+ # bright blue = negative charge; red = positive charge;
+ # dull blue = small alcohol; purple = polar; yellow = cysteine.
+
+ my ( $char, # char to decorate
+ ) = @_;
+
+ # returns string
+
+ my ( %hash, $color_set );
+
+ %hash = (
+ K => "bright-red",
+ R => "bright-red",
+ H => "dark-green",
+ D => "bright-blue",
+ E => "bright-blue",
+ S => "dull-blue",
+ T => "dull-blue",
+ N => "purple",
+ Q => "purple",
+ A => "bright-green",
+ V => "bright-green",
+ I => "bright-green",
+ L => "bright-green",
+ M => "bright-green",
+ F => "dark-green",
+ Y => "dark-green",
+ W => "dark-green",
+ C => "yellow",
+ G => "bright-green",
+ P => "bright-green",
+ Z => "dark-gray",
+ B => "dark-gray",
+ "?" => "light-gray",
+ "~" => "light-gray",
+ "*" => "dark-gray",
+ );
+
+ if ( exists $hash{ uc $char } ) {
+ $color_set = $hash{ uc $char };
+ } else {
+ $color_set = "black";
+ }
+
+ return $color_set;
+}
+
+
+sub color_nuc
+{
+ # Martin A. Hansen, October 2005.
+
+ # color scheme for nucleotides as defined in Mview.
+ # given a char returns the appropriate color
+ # according to physical/chemical proterties.
+
+ my ( $char, # char to decorate
+ ) = @_;
+
+ # returns string
+
+ my ( %hash, $color_set );
+
+ %hash = (
+ A => "bright-red",
+ G => "yellow",
+ C => "blue",
+ T => "green",
+ U => "green",
+ );
+
+ if ( exists $hash{ uc $char } ) {
+ $color_set = $hash{ uc $char };
+ } else {
+ $color_set = "black";
+ }
+
+ return $color_set;
+}
+
+
+sub color_palette
+{
+ # Martin A. Hansen, October 2005.
+
+ # hash table with color-names and color-hex.
+
+ my ( $color, # common color name
+ ) = @_;
+
+ # returns string
+
+ my ( %hash );
+
+ %hash = (
+ "black" => "#000000",
+ "white" => "#ffffff",
+ "red" => "#ff0000",
+ "green" => "#00ff00",
+ "blue" => "#0000ff",
+ "cyan" => "#00ffff",
+ "magenta" => "#ff00ff",
+# "yellow" => "#ffff00",
+ "yellow" => "#ffc800",
+ "purple" => "#6600cc",
+ "dull-blue" => "#0099ff",
+ "dark-green-blue" => "#33cccc",
+ "medium-green-blue" => "#00ffcc",
+ "bright-blue" => "#0033ff",
+ "dark-green" => "#009900",
+ "bright-green" => "#33cc00",
+ "orange" => "#ff3333",
+ "orange-brown" => "#cc6600",
+ "bright-red" => "#cc0000",
+ "light-gray" => "#999999",
+ "dark-gray" => "#666666",
+ "gray0" => "#ffffff",
+ "gray1" => "#eeeeee",
+ "gray2" => "#dddddd",
+ "gray3" => "#cccccc",
+ "gray4" => "#bbbbbb",
+ "gray5" => "#aaaaaa",
+ "gray6" => "#999999",
+ "gray7" => "#888888",
+ "gray8" => "#777777",
+ "gray9" => "#666666",
+ "gray10" => "#555555",
+ "gray11" => "#444444",
+ "gray12" => "#333333",
+ "gray13" => "#222222",
+ "gray14" => "#111111",
+ "gray15" => "#000000",
+ "clustal-red" => "#ff1111",
+ "clustal-blue" => "#1155ff",
+ "clustal-green" => "#11dd11",
+ "clustal-cyan" => "#11ffff",
+ "clustal-yellow" => "#ffff11",
+ "clustal-orange" => "#ff7f11",
+ "clustal-pink" => "#ff11ff",
+ "clustal-purple" => "#6611cc",
+ "clustal-dull-blue" => "#197fe5",
+ "clustal-dark-gray" => "#666666",
+ "clustal-light-gray" => "#999999",
+ "lin-A" => "#90fe23",
+ "lin-R" => "#fe5e2d",
+ "lin-N" => "#2e3d2d",
+ "lin-D" => "#00903b",
+ "lin-C" => "#004baa",
+ "lin-Q" => "#864b00",
+ "lin-E" => "#3fa201",
+ "lin-G" => "#10fe68",
+ "lin-H" => "#b2063b",
+ "lin-I" => "#04ced9",
+ "lin-L" => "#4972fe",
+ "lin-K" => "#c4a100",
+ "lin-M" => "#2a84dd",
+ "lin-F" => "#a60ade",
+ "lin-P" => "#fe61fe",
+ "lin-S" => "#f7e847",
+ "lin-T" => "#fefeb3",
+ "lin-W" => "#4a007f",
+ "lin-Y" => "#e903a8",
+ "lin-V" => "#5bfdfd",
+ );
+
+ if ( exists $hash{ $color } ) {
+ return $hash{ $color };
+ } else {
+ print STDERR qq(WARNING: color "$color" not found in palette!\n);
+ }
+}
+
+
+sub color_contrast
+{
+ # Martin A. Hansen, October 2005.
+
+ # Hash table with contrast colors to be used for frontground
+ # text on a given background color.
+
+ my ( $color, # background color
+ ) = @_;
+
+ # returns string
+
+ my ( %hash );
+
+ %hash = (
+ "black" => "white",
+ "white" => "black",
+ "red" => "white",
+ "green" => "white",
+ "blue" => "white",
+ "cyan" => "white",
+ "magenta" => "white",
+ "yellow" => "black",
+ "purple" => "white",
+ "dull-blue" => "white",
+ "dark-green-blue" => "white",
+ "medium-green-blue" => "white",
+ "bright-blue" => "white",
+ "dark-green" => "white",
+ "bright-green" => "black",
+ "orange" => "",
+ "orange-brown" => "",
+ "bright-red" => "white",
+ "light-gray" => "black",
+ "dark-gray" => "white",
+ "gray0" => "",
+ "gray1" => "",
+ "gray2" => "",
+ "gray3" => "",
+ "gray4" => "",
+ "gray5" => "",
+ "gray6" => "",
+ "gray7" => "",
+ "gray8" => "",
+ "gray9" => "",
+ "gray10" => "",
+ "gray11" => "",
+ "gray12" => "",
+ "gray13" => "",
+ "gray14" => "",
+ "gray15" => "",
+ "clustal-red" => "black",
+ "clustal-blue" => "black",
+ "clustal-green" => "black",
+ "clustal-cyan" => "black",
+ "clustal-yellow" => "black",
+ "clustal-orange" => "black",
+ "clustal-pink" => "black",
+ "clustal-purple" => "black",
+ "clustal-dull-blue" => "black",
+ "clustal-dark-gray" => "black",
+ "clustal-light-gray" => "black",
+ "lin-A" => "",
+ "lin-R" => "",
+ "lin-N" => "",
+ "lin-D" => "",
+ "lin-C" => "",
+ "lin-Q" => "",
+ "lin-E" => "",
+ "lin-G" => "",
+ "lin-H" => "",
+ "lin-I" => "",
+ "lin-L" => "",
+ "lin-K" => "",
+ "lin-M" => "",
+ "lin-F" => "",
+ "lin-P" => "",
+ "lin-S" => "",
+ "lin-T" => "",
+ "lin-W" => "",
+ "lin-Y" => "",
+ "lin-V" => "",
+ );
+
+ if ( exists $hash{ $color } ) {
+ return $hash{ $color };
+ } else {
+ print STDERR qq(WARNING: color "$color" not found in palette!\n);
+ }
+}
+
+
+sub seq_word_pack
+{
+ # Martin A. Hansen, April 2008.
+
+ # Packs a sequence word into a binary number.
+
+ my ( $word, # Word to be packed
+ ) = @_;
+
+ # Returns integer.
+
+ my ( %hash, $bin, $word_size, $pad );
+
+ %hash = (
+ 'A' => '000',
+ 'T' => '001',
+ 'C' => '010',
+ 'G' => '100',
+ 'N' => '011',
+ '-' => '101',
+ '.' => '110',
+ '~' => '111',
+ );
+
+ map { $bin .= pack "B3", $hash{ $_ } } split //, $word;
+
+ $word_size = length $word;
+
+ $pad = ( 3 * $word_size ) / 8;
+
+ if ( $pad =~ /\./ )
+ {
+ $pad = ( ( int $pad + 1 ) * 8 ) - 3 * $word_size;
+
+ $bin .= pack "B$pad", 0 x $pad;
+ }
+
+ return $bin;
+}
+
+
+sub seq_word_unpack
+{
+ # Martin A. Hansen, April 2008.
+
+ # Unpacks a binary sequence word to ASCII.
+
+ my ( $bin, # Binary sequence word
+ $word_size, # Size of word
+ ) = @_;
+
+ # Returns string.
+
+ my ( %hash, $word );
+
+ %hash = (
+ '000' => 'A',
+ '001' => 'T',
+ '010' => 'C',
+ '100' => 'G',
+ '011' => 'N',
+ '101' => '-',
+ '110' => '.',
+ '111' => '~',
+ );
+
+ map { $word .= $hash{ $_ } } unpack "(B3)$word_size", $bin;
+
+ return $word;
+}
+
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::Solid;
+
+
+# Copyright (C) 2007-2008 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for manipulation Solid sequence files with di-base encoding.
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use vars qw( @ISA @EXPORT_OK );
+
+require Exporter;
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> CONSTANTS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my %CONVERT_HASH = (
+ 'A0' => 'A', 'AA' => 0,
+ 'A1' => 'C', 'AC' => 1,
+ 'A2' => 'G', 'AG' => 2,
+ 'A3' => 'T', 'AT' => 3,
+ 'C0' => 'C', 'CA' => 1,
+ 'C1' => 'A', 'CC' => 0,
+ 'C2' => 'T', 'CG' => 3,
+ 'C3' => 'G', 'CT' => 2,
+ 'G0' => 'G', 'GA' => 2,
+ 'G1' => 'T', 'GC' => 3,
+ 'G2' => 'A', 'GG' => 0,
+ 'G3' => 'C', 'GT' => 1,
+ 'T0' => 'T', 'TA' => 3,
+ 'T1' => 'G', 'TC' => 2,
+ 'T2' => 'C', 'TG' => 1,
+ 'T3' => 'A', 'TT' => 0,
+ 'AN' => 4,
+ 'CN' => 4,
+ 'GN' => 4,
+ 'TN' => 4,
+ 'NA' => 5,
+ 'NC' => 5,
+ 'NG' => 5,
+ 'NT' => 5,
+ 'NN' => 6,
+);
+
+
+# from Solid - ABI
+
+sub define_color_code {
+
+ my %color = ();
+
+ $color{AA} = 0;
+ $color{CC} = 0;
+ $color{GG} = 0;
+ $color{TT} = 0;
+ $color{AC} = 1;
+ $color{CA} = 1;
+ $color{GT} = 1;
+ $color{TG} = 1;
+ $color{AG} = 2;
+ $color{CT} = 2;
+ $color{GA} = 2;
+ $color{TC} = 2;
+ $color{AT} = 3;
+ $color{CG} = 3;
+ $color{GC} = 3;
+ $color{TA} = 3;
+ $color{AN} = 4;
+ $color{CN} = 4;
+ $color{GN} = 4;
+ $color{TN} = 4;
+ $color{NA} = 5;
+ $color{NC} = 5;
+ $color{NG} = 5;
+ $color{NT} = 5;
+ $color{NN} = 6;
+
+ return(%color);
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub color_space2seq
+{
+ # Martin A. Hansen, April 2008.
+
+ # Converts a di-base encoded Solid sequence to
+ # regular sequence.
+
+ my ( $seq_cs, # di-base encode sequence
+ ) = @_;
+
+ # Returns a string.
+
+ my ( @codes, $base, $i, $seq );
+
+ @codes = split //, $seq_cs;
+ $base = shift @codes;
+ $seq = $base;
+
+ for ( $i = 0; $i < @codes; $i++ )
+ {
+ $base = $CONVERT_HASH{ $base . $codes[ $i ] };
+ $seq .= $base;
+ }
+
+ return $seq;
+}
+
+
+sub seq2color_space
+{
+ # Martin A. Hansen, April 2008.
+
+ # Converts a sequence to di-base encoded Solid sequence.
+
+ my ( $seq, # sequence
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $i, $seq_cs );
+
+ $seq_cs = substr $seq, 0, 1;
+
+ for ( $i = 0; $i < length( $seq ) - 1; $i++ ) {
+ $seq_cs .= $CONVERT_HASH{ substr( $seq, $i, 2 ) };
+ }
+
+ return $seq_cs;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+1;
--- /dev/null
+package Maasha::Stockholm;
+
+# Copyright (C) 2006 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Routines for manipulation of the Stockholm format.
+# http://www.cgb.ki.se/cgb/groups/sonnhammer/Stockholm.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use Data::Dumper;
+use Maasha::Common;
+use vars qw ( @ISA @EXPORT );
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub get_stockholm_entry
+{
+ # Martin A. Hansen, February 2007.
+
+ # Given a file handle, returns the next stockholm
+ # entry as a list of lines.
+
+ my ( $fh, # file handle
+ ) = @_;
+
+ # returns a list
+
+ my ( $line, @lines );
+
+ while ( defined $fh and $line = <$fh> )
+ {
+ chomp $line;
+
+ push @lines, $line;
+
+ last if $line eq "//";
+ }
+
+ if ( not @lines ) {
+ return undef;
+ } else {
+ return wantarray ? @lines : \@lines;
+ }
+}
+
+
+sub parse_stockholm_entry
+{
+ # Martin A. Hansen, February 2007.
+
+ # given a Stockholm entry as a list of lines,
+ # parses this into an elaborate data structure.
+ # Compultory fields: AC ID DE AU SE SS BM GA TC NC TP SQ
+ # Non-compultory fields: PI DC DR RC RN RM RT RA RL CC
+
+ my ( $entry, # stockholm entry
+ ) = @_;
+
+ # returns data structure
+
+ my ( $line, %hash, %align_hash, @align_list, @align );
+
+ foreach $line ( @{ $entry } )
+ {
+ next if $line =~ /^# /;
+
+ if ( $line =~ /^#=GF\s+([^\s]+)\s+(.*)$/ )
+ {
+ push @{ $hash{ "GF" }{ $1 } }, $2;
+ }
+ elsif ( $line =~ /^#=GC\s+([^\s]+)\s+(.*)$/ )
+ {
+ push @{ $hash{ "GC" }{ $1 } }, $2;
+ }
+ elsif ( $line =~ /^#=GS\s+([^\s]+)\s+([^\s]+)\s+(.*)$/ )
+ {
+ push @{ $hash{ "GS" }{ $1 }{ $2 } }, $3;
+ }
+ elsif ( $line =~ /^#=GR\s+([^\s]+)\s+([^\s]+)\s+(.*)$/ )
+ {
+ push @{ $hash{ "GR" }{ $1 }{ $2 } }, $3;
+ }
+ elsif ( $line =~ /^([^\s]+)\s+(.+)$/ )
+ {
+ push @align_list, $1 if not exists $align_hash{ $1 };
+
+ $align_hash{ $1 } .= $2;
+ }
+ }
+
+ map { $hash{ "GF" }{ $_ } = join " ", @{ $hash{ "GF" }{ $_ } } } keys %{ $hash{ "GF" } };
+ map { $hash{ "GC" }{ $_ } = join "", @{ $hash{ "GC" }{ $_ } } } keys %{ $hash{ "GC" } };
+ map { push @align, [ $_, $align_hash{ $_ } ] } @align_list;
+
+ push @align, [ "SS_cons", $hash{ "GC" }{ "SS_cons" } ];
+ push @align, [ "RF", $hash{ "GC" }{ "RF" } ] if $hash{ "GC" }{ "RF" };
+
+ delete $hash{ "GC" };
+
+ $hash{ "ALIGN" } = \@align;
+
+ return wantarray ? %hash : \%hash;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
--- /dev/null
+package Maasha::Test;
+
+
+use warnings;
+use strict;
+
+require Exporter;
+
+use vars qw( @ISA @EXPORT @EXPORT_OK );
+
+@ISA = qw( Exporter );
+
+@EXPORT_OK = qw(
+ hello_world
+);
+
+sub hello_world
+{
+ print "Hello Word\n";
+}
+
+END {
+ &hello_world;
+}
--- /dev/null
+package Maasha::TwoBit;
+
+# Copyright (C) 2008 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Stuff for interacting with the 2bit format as described here:
+# http://genome.ucsc.edu/FAQ/FAQformat#format7
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use warnings;
+use strict;
+use vars qw( @ISA @EXPORT );
+
+use Data::Dumper;
+
+use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "TMP_DIR" } );
+
+int find_block_beg( char *string, char c, int beg, int len )
+{
+ /* Martin A. Hansen, March 2008 */
+
+ /* Given a string and a begin position, locates the next */
+ /* position in the string MATCHING a given char. */
+ /* This position is returned. If the char is not found -1 is returned. */
+
+ int i;
+
+ for ( i = beg; i < len; i++ )
+ {
+ if ( string[ i ] == c ) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+
+int find_block_len( char *string, char c, int beg, int len )
+{
+ /* Martin A. Hansen, March 2008 */
+
+ /* Given a string and a begin position, locates the next length of */
+ /* a block consisting of a given char. The length of that block is returned. */
+
+ int i;
+
+ i = beg;
+
+ while ( i < len && string[ i ] == c )
+ {
+ i++;
+ }
+
+ return i - beg;
+}
+
+
+char l2n[26] = { 2, 255, 1, 255, 255, 255, 3, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255 };
+
+void dna2bin( char *raw, int size )
+{
+ /* Khisanth from #perl March 2008 */
+
+ /* Encodes a DNA string to a bit array */
+
+ Inline_Stack_Vars;
+ unsigned int i = 0;
+ unsigned char packed_value = 0;
+ char *packed = malloc( size / 4 );
+ packed[0] = 0;
+
+ for( i = 0; i < size / 4; i++ ) {
+ packed_value = l2n[ raw[i*4] - 'A' ] << 6
+ | l2n[ raw[i*4+1] - 'A' ] << 4
+ | l2n[ raw[i*4+2] - 'A' ] << 2
+ | l2n[ raw[i*4+3] - 'A' ];
+ packed[i] = packed_value;
+ }
+
+ Inline_Stack_Reset;
+ Inline_Stack_Push(sv_2mortal(newSVpvn(packed, size / 4 )));
+ Inline_Stack_Done;
+ free(packed);
+}
+
+
+char *conv[256] = {
+ "TTTT", "TTTC", "TTTA", "TTTG", "TTCT", "TTCC", "TTCA", "TTCG", "TTAT",
+ "TTAC", "TTAA", "TTAG", "TTGT", "TTGC", "TTGA", "TTGG", "TCTT", "TCTC",
+ "TCTA", "TCTG", "TCCT", "TCCC", "TCCA", "TCCG", "TCAT", "TCAC", "TCAA",
+ "TCAG", "TCGT", "TCGC", "TCGA", "TCGG", "TATT", "TATC", "TATA", "TATG",
+ "TACT", "TACC", "TACA", "TACG", "TAAT", "TAAC", "TAAA", "TAAG", "TAGT",
+ "TAGC", "TAGA", "TAGG", "TGTT", "TGTC", "TGTA", "TGTG", "TGCT", "TGCC",
+ "TGCA", "TGCG", "TGAT", "TGAC", "TGAA", "TGAG", "TGGT", "TGGC", "TGGA",
+ "TGGG", "CTTT", "CTTC", "CTTA", "CTTG", "CTCT", "CTCC", "CTCA", "CTCG",
+ "CTAT", "CTAC", "CTAA", "CTAG", "CTGT", "CTGC", "CTGA", "CTGG", "CCTT",
+ "CCTC", "CCTA", "CCTG", "CCCT", "CCCC", "CCCA", "CCCG", "CCAT", "CCAC",
+ "CCAA", "CCAG", "CCGT", "CCGC", "CCGA", "CCGG", "CATT", "CATC", "CATA",
+ "CATG", "CACT", "CACC", "CACA", "CACG", "CAAT", "CAAC", "CAAA", "CAAG",
+ "CAGT", "CAGC", "CAGA", "CAGG", "CGTT", "CGTC", "CGTA", "CGTG", "CGCT",
+ "CGCC", "CGCA", "CGCG", "CGAT", "CGAC", "CGAA", "CGAG", "CGGT", "CGGC",
+ "CGGA", "CGGG", "ATTT", "ATTC", "ATTA", "ATTG", "ATCT", "ATCC", "ATCA",
+ "ATCG", "ATAT", "ATAC", "ATAA", "ATAG", "ATGT", "ATGC", "ATGA", "ATGG",
+ "ACTT", "ACTC", "ACTA", "ACTG", "ACCT", "ACCC", "ACCA", "ACCG", "ACAT",
+ "ACAC", "ACAA", "ACAG", "ACGT", "ACGC", "ACGA", "ACGG", "AATT", "AATC",
+ "AATA", "AATG", "AACT", "AACC", "AACA", "AACG", "AAAT", "AAAC", "AAAA",
+ "AAAG", "AAGT", "AAGC", "AAGA", "AAGG", "AGTT", "AGTC", "AGTA", "AGTG",
+ "AGCT", "AGCC", "AGCA", "AGCG", "AGAT", "AGAC", "AGAA", "AGAG", "AGGT",
+ "AGGC", "AGGA", "AGGG", "GTTT", "GTTC", "GTTA", "GTTG", "GTCT", "GTCC",
+ "GTCA", "GTCG", "GTAT", "GTAC", "GTAA", "GTAG", "GTGT", "GTGC", "GTGA",
+ "GTGG", "GCTT", "GCTC", "GCTA", "GCTG", "GCCT", "GCCC", "GCCA", "GCCG",
+ "GCAT", "GCAC", "GCAA", "GCAG", "GCGT", "GCGC", "GCGA", "GCGG", "GATT",
+ "GATC", "GATA", "GATG", "GACT", "GACC", "GACA", "GACG", "GAAT", "GAAC",
+ "GAAA", "GAAG", "GAGT", "GAGC", "GAGA", "GAGG", "GGTT", "GGTC", "GGTA",
+ "GGTG", "GGCT", "GGCC", "GGCA", "GGCG", "GGAT", "GGAC", "GGAA", "GGAG",
+ "GGGT", "GGGC", "GGGA", "GGGG"
+};
+
+
+void bin2dna( char *raw, int size )
+{
+ /* Khisanth from #perl, March 2008 */
+
+ /* Converts a bit array to DNA which is returned. */
+
+ Inline_Stack_Vars;
+ char *unpacked = malloc( 4 * size + 1 );
+
+ int i = 0;
+ unsigned char conv_index;
+ unpacked[0] = 0;
+
+ for( i = 0; i < size; i++ ) {
+ memset( &conv_index, raw[i], 1 );
+ memcpy( unpacked + i*4, conv[conv_index], 4);
+ }
+
+ Inline_Stack_Reset;
+ Inline_Stack_Push(sv_2mortal(newSVpvn(unpacked, 4 * size)));
+ Inline_Stack_Done;
+ free(unpacked);
+}
+
+
+void bin2dna_old( char *bin, int bin_len )
+{
+ /* Martin A. Hansen, March 2008 */
+
+ /* Converts a binary string to DNA which is returned. */
+
+ Inline_Stack_Vars;
+
+ int i, c;
+
+ char *dna = ( char* )( malloc( bin_len / 2 ) );
+
+ c = 0;
+
+ for ( i = 1; i < bin_len; i += 2 )
+ {
+ if ( bin[ i - 1 ] == '1' )
+ {
+ if ( bin[ i ] == '1' ) {
+ dna[ c ] = 'G';
+ } else {
+ dna[ c ] = 'A';
+ }
+ }
+ else
+ {
+ if ( bin[ i ] == '1' ) {
+ dna[ c ] = 'C';
+ } else {
+ dna[ c ] = 'T';
+ }
+ }
+
+ c++;
+ }
+
+ Inline_Stack_Reset;
+ Inline_Stack_Push( sv_2mortal( newSVpvn( dna, ( bin_len / 2 ) ) ) );
+ Inline_Stack_Done;
+
+ free( dna );
+}
+
+
+void hard_mask( char *seq, int beg, int len, int sub_beg, int sub_len )
+{
+ /* Martin A. Hansen, March 2008 */
+
+ /* Hard masks a sequnce in a given interval, which is trimmed, */
+ /* if it does not match the sequence. */
+
+ int i, mask_beg, mask_len;
+
+ if ( sub_beg + sub_len >= beg && sub_beg <= beg + len )
+ {
+ mask_beg = beg - sub_beg;
+
+ if ( mask_beg < 0 ) {
+ mask_beg = 0;
+ }
+
+ mask_len = len;
+
+ if ( sub_len < mask_len ) {
+ mask_len = sub_len;
+ }
+
+ for ( i = mask_beg; i < mask_beg + mask_len; i++ ) {
+ seq[ i ] = 'N';
+ }
+ }
+}
+
+
+void soft_mask( char *seq, int beg, int len, int sub_beg, int sub_len )
+{
+ /* Martin A. Hansen, March 2008 */
+
+ /* Soft masks a sequnce in a given interval, which is trimmed, */
+ /* if it does not match the sequence. */
+
+ int i, mask_beg, mask_len;
+
+ if ( sub_beg + sub_len >= beg && sub_beg <= beg + len )
+ {
+ mask_beg = beg - sub_beg;
+
+ if ( mask_beg < 0 ) {
+ mask_beg = 0;
+ }
+
+ mask_len = len;
+
+ if ( sub_len < mask_len ) {
+ mask_len = sub_len;
+ }
+
+ for ( i = mask_beg; i < mask_beg + mask_len; i++ ) {
+ seq[ i ] = seq[ i ] ^ ' ';
+ }
+ }
+}
+
+END_C
+
+
+use Maasha::Common;
+use Maasha::Fasta;
+use Maasha::Seq;
+
+use constant {
+ SEQ_NAME => 0,
+ SEQ => 1,
+};
+
+@ISA = qw( Exporter );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub twobit_get_TOC
+{
+ # Martin A. Hansen, March 2008.
+
+ # Fetches the table of contents (TOC) from a 2bit file.
+ # The TOC is returned as a list of lists.
+
+ # The 2bit format is described here:
+ # http://genome.ucsc.edu/FAQ/FAQformat#format7
+
+ my ( $fh, # filehandle
+ ) = @_;
+
+ # Returns AoA.
+
+ my ( $signature, $version, $seq_count, $reserved, $i, $seq_name_size, $string, $seq_name, $offset, @AoA );
+
+ sysseek $fh, 0, 0;
+
+ $signature = &unpack_32bit( $fh );
+ $version = &unpack_32bit( $fh );
+ $seq_count = &unpack_32bit( $fh );
+ $reserved = &unpack_32bit( $fh );
+
+ &Maasha::Common::error( qq(2bit file signature didn't match - inverse bit order?) ) if $signature != 0x1A412743;
+
+ for ( $i = 0; $i < $seq_count; $i++ )
+ {
+ $seq_name_size = &unpack_8bit( $fh );
+
+ sysread $fh, $string, $seq_name_size;
+
+ $seq_name = unpack( "A$seq_name_size", $string );
+
+ $offset = &unpack_32bit( $fh );
+
+ push @AoA, [ $seq_name, $offset ];
+ }
+
+ return wantarray ? @AoA : \@AoA;
+}
+
+
+sub twobit_get_seq
+{
+ # Martin A. Hansen, March 2008.
+
+ # Given a filehandle to a 2bit file, gets the sequence
+ # or subsequence from an 2bit file entry at the given
+ # offset position.
+
+ # The 2bit format is described here:
+ # http://genome.ucsc.edu/FAQ/FAQformat#format7
+
+ my ( $fh, # filehandle
+ $offset, # byte position
+ $sub_beg, # begin of subsequence - OPTIONAL
+ $sub_len, # length of subsequence - OPTIONAL
+ $mask, # retrieve soft mask information flag - OPTIONAL
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $string, $seq_len, $n_count, $n, @n_begs, @n_sizes, $m_count, $m, @m_begs, @m_sizes, $reserved, $seq );
+
+ $sub_beg ||= 0;
+ $sub_len ||= 9999999999;
+
+ sysseek $fh, $offset, 0;
+
+ $seq_len = &unpack_32bit( $fh );
+ $sub_len = $seq_len if $sub_len > $seq_len;
+
+ $n_count = &unpack_32bit( $fh );
+
+ map { push @n_begs, &unpack_32bit( $fh ) } 1 .. $n_count;
+ map { push @n_sizes, &unpack_32bit( $fh ) } 1 .. $n_count;
+
+ $m_count = &unpack_32bit( $fh );
+
+ map { push @m_begs, &unpack_32bit( $fh ) } 1 .. $m_count;
+ map { push @m_sizes, &unpack_32bit( $fh ) } 1 .. $m_count;
+
+ $reserved = &unpack_32bit( $fh );
+
+ $offset += 4 + 4 + $n_count * 8 + 4 + $m_count * 8 + 4;
+
+ $seq = &unpack_dna( $fh, $offset, $sub_beg, $sub_len );
+
+ for ( $n = 0; $n < $n_count; $n++ )
+ {
+ hard_mask( $seq, $n_begs[ $n ], $n_sizes[ $n ], $sub_beg, $sub_len );
+
+ last if $sub_beg + $sub_len < $n_begs[ $n ];
+ }
+
+ if ( $mask )
+ {
+ for ( $m = 0; $m < $m_count; $m++ )
+ {
+ soft_mask( $seq, $m_begs[ $m ], $m_sizes[ $m ], $sub_beg, $sub_len );
+
+ last if $sub_beg + $sub_len < $m_begs[ $m ];
+ }
+ }
+
+ return $seq;
+}
+
+
+sub unpack_8bit
+{
+ # Martin A. Hansen, March 2008.
+
+ # Reads in 8 bits from the given filehandle
+ # and returns the encoded value.
+
+ # NB swap still needs fixing.
+
+ my ( $fh, # filehandle
+ $swap, # bit order swap flag - OPTIONAL
+ ) = @_;
+
+ # Returns integer.
+
+ my ( $string, $val );
+
+ sysread $fh, $string, 1;
+
+ $val = unpack( "C", $string );
+
+ return $val;
+}
+
+
+sub unpack_32bit
+{
+ # Martin A. Hansen, March 2008.
+
+ # Reads in 32 bits from the given filehandle
+ # and returns the encoded value.
+
+ my ( $fh, # filehandle
+ $swap, # bit order swap flag - OPTIONAL
+ ) = @_;
+
+ # Returns integer.
+
+ my ( $string, $val );
+
+ sysread $fh, $string, 4;
+
+ if ( $swap ) {
+ $val = unpack( "N", $string );
+ } else {
+ $val = unpack( "V", $string );
+ }
+
+ return $val;
+}
+
+
+sub unpack_dna
+{
+ # Martin A. Hansen, March 2008.
+
+ # Unpacks the DNA beginning at the given filehandle.
+ # The DNA packed to two bits per base, where the first
+ # base is in the most significant 2-bit byte; the last
+ # base is in the least significant 2 bits. The packed
+ # DNA field is padded with 0 bits as necessary to take
+ # an even multiple of 32 bits in the file.
+
+ # NB swap still needs fixing.
+
+ my ( $fh, # filehandle
+ $offset, # file offset
+ $beg, # sequence beg
+ $len, # sequence length
+ $swap, # bit order swap flag - OPTIONAL
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $bin, $bin_beg, $bin_len, $dna, $bin_diff, $len_diff );
+
+ $bin_beg = int( $beg / 4 );
+ $bin_beg-- if $beg % 4;
+ $bin_beg = 0 if $bin_beg < 0;
+
+ $bin_len = int( $len / 4 );
+ $bin_len++ if $len % 4;
+
+ sysseek $fh, $offset + $bin_beg, 0;
+ sysread $fh, $bin, $bin_len;
+
+ $dna = bin2dna( $bin, $bin_len );
+
+ $bin_diff = $beg - $bin_beg * 4;
+ $len_diff = $bin_len * 4 - $len;
+
+ $dna =~ s/^.{$bin_diff}// if $bin_diff;
+ $dna =~ s/.{$len_diff}$// if $len_diff;
+
+ return $dna;
+}
+
+
+sub fasta2twobit
+{
+ # Martin A. Hansen, March 2008.
+
+ # Converts a FASTA file to 2bit format.
+
+ my ( $fh_in, # file handle to FASTA file
+ $fh_out, # output file handle - OPTIONAL
+ $mask, # preserver soft masking - OPTIONAL
+ ) = @_;
+
+ my ( $seq_offset, $offset, $entry, $mask_index, $seq_len, $seq_name_len, $pack_len, $rec_len, $index, $bin, $seq );
+
+ $fh_out = \*STDOUT if not $fh_out;
+
+ # ---- Creating content index ----
+
+ $seq_offset = 0; # offset for reading sequence from FASTA file
+ $offset = 16; # offset starting after header line which is 16 bytes
+
+ while ( $entry = &Maasha::Fasta::get_entry( $fh_in ) )
+ {
+ $seq_len = length $entry->[ SEQ ];
+ $seq_name_len = length $entry->[ SEQ_NAME ];
+
+ $mask_index = &mask_locate( $entry->[ SEQ ], $mask );
+
+ $pack_len = ( $seq_len + ( 4 - ( $seq_len ) % 4 ) ) / 4;
+
+ $rec_len = (
+ 4 # Sequence length
+ + 4 # N blocks
+ + 4 * $mask_index->{ "N_COUNT" } # N begins
+ + 4 * $mask_index->{ "N_COUNT" } # N lengths
+ + 4 # M blocks
+ + 4 * $mask_index->{ "M_COUNT" } # M begins
+ + 4 * $mask_index->{ "M_COUNT" } # M lengths
+ + 4 # reserved
+ + $pack_len # Packed DNA - 32 bit multiplum of 2 bit/base sequence in bytes
+ );
+
+ push @{ $index }, {
+ SEQ_NAME => $entry->[ SEQ_NAME ],
+ SEQ_NAME_LEN => $seq_name_len,
+ SEQ_BEG => $seq_offset + $seq_name_len + 2,
+ SEQ_LEN => $seq_len,
+ N_COUNT => $mask_index->{ "N_COUNT" },
+ N_BEGS => $mask_index->{ "N_BEGS" },
+ N_LENS => $mask_index->{ "N_LENS" },
+ M_COUNT => $mask_index->{ "M_COUNT" },
+ M_BEGS => $mask_index->{ "M_BEGS" },
+ M_LENS => $mask_index->{ "M_LENS" },
+ REC_LEN => $rec_len,
+ };
+
+ $offset += (
+ + 1 # 1 byte SEQ_NAME size
+ + $seq_name_len # SEQ_NAME depending on SEQ_NAME size
+ + 4 # 32 bit offset position of sequence record
+ );
+
+ $seq_offset += $seq_name_len + 2 + $seq_len + 1;
+ }
+
+ # ---- Printing Header ----
+
+ $bin = pack( "V4", oct "0x1A412743", "0", scalar @{ $index }, 0 ); # signature, version, sequence count and reserved
+
+ print $fh_out $bin;
+
+ # ---- Printing TOC ----
+
+ undef $bin;
+
+ foreach $entry ( @{ $index } )
+ {
+ $bin .= pack( "C", $entry->{ "SEQ_NAME_LEN" } ); # 1 byte SEQ_NAME size
+ $bin .= pack( qq(A$entry->{ "SEQ_NAME_LEN" }), $entry->{ "SEQ_NAME" } ); # SEQ_NAME depending on SEQ_NAME size
+ $bin .= pack( "V", $offset ); # 32 bit offset position of sequence record
+
+ $offset += $entry->{ "REC_LEN" };
+ }
+
+ print $fh_out $bin;
+
+ # ---- Printing Records ----
+
+ foreach $entry ( @{ $index } )
+ {
+ undef $bin;
+
+ $bin .= pack( "V", $entry->{ "SEQ_LEN" } );
+ $bin .= pack( "V", $entry->{ "N_COUNT" } );
+
+ map { $bin .= pack( "V", $_ ) } @{ $entry->{ "N_BEGS" } };
+ map { $bin .= pack( "V", $_ ) } @{ $entry->{ "N_LENS" } };
+
+ $bin .= pack( "V", $entry->{ "M_COUNT" } );
+
+ map { $bin .= pack( "V", $_ ) } @{ $entry->{ "M_BEGS" } };
+ map { $bin .= pack( "V", $_ ) } @{ $entry->{ "M_LENS" } };
+
+ $bin .= pack( "V", 0 );
+
+ sysseek $fh_in, $entry->{ "SEQ_BEG" }, 0;
+ sysread $fh_in, $seq, $entry->{ "SEQ_LEN" };
+
+ $seq = uc $seq;
+ $seq =~ tr/RYWSMKHDVBN/TTTTTTTTTTT/;
+
+ $bin .= &pack_dna( $seq );
+
+ print $fh_out $bin;
+ }
+
+ close $fh_in;
+ close $fh_out;
+}
+
+
+sub pack_dna
+{
+ # Martin A. Hansen, March 2008.
+
+ # Packs a DNA sequence into a bit array, The DNA packed to two bits per base,
+ # represented as so: T - 00, C - 01, A - 10, G - 11. The first base is
+ # in the most significant 2-bit byte; the last base is in the least significant
+ # 2 bits. For example, the sequence TCAG is represented as 00011011.
+ # The packedDna field is padded with 0 bits as necessary to take an even
+ # multiple of 32 bits in the file.
+
+ my ( $dna, # dna string to pack
+ ) = @_;
+
+ # Returns bit array
+
+ my ( $bin );
+
+ $dna .= "T" x ( 4 - ( length( $dna ) % 4 ) );
+
+ $bin = dna2bin( $dna, length $dna );
+
+ return $bin;
+}
+
+
+sub mask_locate
+{
+ # Martin A. Hansen, March 2008.
+
+ # Locate N-blocks and M-blocks in a given sequence.
+ # These blocks a continously streches of Ns and Ms in a string,
+ # and the begins and lenghts of these blocks are saved in a
+ # hash along with the count of each block type.
+
+ my ( $seq, # Sequence
+ $mask, # preserve soft masking flag - OPTIONAL
+ ) = @_;
+
+ # Returns a hash.
+
+ my ( $n_mask, $m_mask, $seq_len, $pos, $n_beg, $n_len, $m_beg, $m_len, @n_begs, @n_lens, @m_begs, @m_lens, %mask_hash );
+
+ $seq =~ tr/atcgunRYWSMKHDVBrywsmkhdvb/MMMMMNNNNNNNNNNNNNNNNNNNNN/;
+
+ $n_mask = 1; # always mask Ns.
+ $m_mask = $mask || 0;
+
+ $seq_len = length $seq;
+
+ $pos = 0;
+
+ while ( $n_mask or $m_mask )
+ {
+ if ( $n_mask )
+ {
+ $n_beg = find_block_beg( $seq, "N", $pos, $seq_len );
+
+ $n_mask = 0 if $n_beg < 0;
+ }
+
+ if ( $m_mask )
+ {
+ $m_beg = find_block_beg( $seq, "M", $pos, $seq_len );
+
+ $m_mask = 0 if $m_beg < 0;
+ }
+
+ if ( $n_mask and $m_mask )
+ {
+ if ( $n_beg < $m_beg )
+ {
+ $n_len = find_block_len( $seq, "N", $n_beg, $seq_len );
+
+ push @n_begs, $n_beg;
+ push @n_lens, $n_len;
+
+ $pos = $n_beg + $n_len;
+ }
+ else
+ {
+ $m_len = find_block_len( $seq, "M", $m_beg, $seq_len );
+
+ push @m_begs, $m_beg;
+ push @m_lens, $m_len;
+
+ $pos = $m_beg + $m_len;
+ }
+ }
+ elsif ( $n_mask )
+ {
+ $n_len = find_block_len( $seq, "N", $n_beg, $seq_len );
+
+ push @n_begs, $n_beg;
+ push @n_lens, $n_len;
+
+ $pos = $n_beg + $n_len;
+ }
+ elsif ( $m_mask )
+ {
+ $m_len = find_block_len( $seq, "M", $m_beg, $seq_len );
+
+ push @m_begs, $m_beg;
+ push @m_lens, $m_len;
+
+ $pos = $m_beg + $m_len;
+ }
+ else
+ {
+ last;
+ }
+ }
+
+ %mask_hash = (
+ N_COUNT => scalar @n_begs,
+ N_BEGS => [ @n_begs ],
+ N_LENS => [ @n_lens ],
+ M_COUNT => scalar @m_begs,
+ M_BEGS => [ @m_begs ],
+ M_LENS => [ @m_lens ],
+ );
+
+ return wantarray ? %mask_hash : \%mask_hash;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+1;
--- /dev/null
+package Maasha::UCSC;
+
+# Copyright (C) 2007 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# Stuff for interacting with UCSC genome browser
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use strict;
+use vars qw ( @ISA @EXPORT );
+
+use Data::Dumper;
+use Time::HiRes qw( gettimeofday );
+
+use Maasha::Common;
+use Maasha::Calc;
+use Maasha::Matrix;
+
+use constant {
+ CHR_BEG => 0,
+ NEXT_CHR_BEG => 1,
+ CHR_END => 2,
+ INDEX_BEG => 3,
+ INDEX_LEN => 4,
+};
+
+@ISA = qw( Exporter );
+
+my $TIME = gettimeofday();
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BED format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+# http://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#BED
+
+
+sub bed_get_entry
+{
+ # Martin A. Hansen, December 2007.
+
+ # Reads a bed entry given a filehandle.
+
+ my ( $fh, # file handle
+ $columns, # number of BED columns to read - OPTIONAL
+ ) = @_;
+
+ # Returns hashref.
+
+ my ( $line, @fields, %entry );
+
+ $line = <$fh>;
+
+ $line =~ s/(\n|\r)$//g; # some people have carriage returns in their BED files -> Grrrr
+
+ return if not defined $line;
+
+ @fields = split "\t", $line;
+
+ $columns ||= scalar @fields;
+
+ if ( $columns == 3 )
+ {
+ %entry = (
+ "CHR" => $fields[ 0 ],
+ "CHR_BEG" => $fields[ 1 ],
+ "CHR_END" => $fields[ 2 ] - 1,
+ );
+ }
+ elsif ( $columns == 4 )
+ {
+ %entry = (
+ "CHR" => $fields[ 0 ],
+ "CHR_BEG" => $fields[ 1 ],
+ "CHR_END" => $fields[ 2 ] - 1,
+ "Q_ID" => $fields[ 3 ],
+ );
+ }
+ elsif ( $columns == 5 )
+ {
+ %entry = (
+ "CHR" => $fields[ 0 ],
+ "CHR_BEG" => $fields[ 1 ],
+ "CHR_END" => $fields[ 2 ] - 1,
+ "Q_ID" => $fields[ 3 ],
+ "SCORE" => $fields[ 4 ],
+ );
+ }
+ elsif ( $columns == 6 )
+ {
+ %entry = (
+ "CHR" => $fields[ 0 ],
+ "CHR_BEG" => $fields[ 1 ],
+ "CHR_END" => $fields[ 2 ] - 1,
+ "Q_ID" => $fields[ 3 ],
+ "SCORE" => $fields[ 4 ],
+ "STRAND" => $fields[ 5 ],
+ );
+ }
+ elsif ( $columns == 12 )
+ {
+ %entry = (
+ "CHR" => $fields[ 0 ],
+ "CHR_BEG" => $fields[ 1 ],
+ "CHR_END" => $fields[ 2 ] - 1,
+ "Q_ID" => $fields[ 3 ],
+ "SCORE" => $fields[ 4 ],
+ "STRAND" => $fields[ 5 ],
+ "THICK_BEG" => $fields[ 6 ],
+ "THICK_END" => $fields[ 7 ] - 1,
+ "ITEMRGB" => $fields[ 8 ],
+ "BLOCKCOUNT" => $fields[ 9 ],
+ "BLOCKSIZES" => $fields[ 10 ],
+ "Q_BEGS" => $fields[ 11 ],
+ );
+ }
+ else
+ {
+ &Maasha::Common::error( qq(Bad BED format in line->$line<-) );
+ }
+
+ $entry{ "REC_TYPE" } = "BED";
+ $entry{ "BED_LEN" } = $entry{ "CHR_END" } - $entry{ "CHR_BEG" } + 1;
+ $entry{ "BED_COLS" } = $columns;
+
+ return wantarray ? %entry : \%entry;
+}
+
+
+sub bed_get_entries
+{
+ # Martin A. Hansen, January 2008.
+
+ # Given a path to a BED file, read in all entries
+ # and return.
+
+ my ( $path, # full path to BED file
+ $columns, # number of columns in BED file - OPTIONAL (but is faster)
+ ) = @_;
+
+ # Returns a list.
+
+ my ( $fh, $entry, @list );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $entry = &bed_get_entry( $fh ) ) {
+ push @list, $entry;
+ }
+
+ close $fh;
+
+ return wantarray ? @list : \@list;
+}
+
+
+sub bed_put_entry
+{
+ # Martin A. Hansen, Septermber 2007.
+
+ # Writes a BED entry to file.
+
+ # NB, this could really be more robust!?
+
+ my ( $record, # hashref
+ $fh, # file handle - OPTIONAL
+ $columns, # number of columns in BED file - OPTIONAL (but is faster)
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( @fields );
+
+ $columns ||= 12; # max number of columns possible
+
+ if ( $columns == 3 )
+ {
+ push @fields, $record->{ "CHR" };
+ push @fields, $record->{ "CHR_BEG" };
+ push @fields, $record->{ "CHR_END" } + 1;
+ }
+ elsif ( $columns == 4 )
+ {
+ $record->{ "Q_ID" } =~ s/\s+/_/g;
+
+ push @fields, $record->{ "CHR" };
+ push @fields, $record->{ "CHR_BEG" };
+ push @fields, $record->{ "CHR_END" } + 1;
+ push @fields, $record->{ "Q_ID" };
+ }
+ elsif ( $columns == 5 )
+ {
+ $record->{ "Q_ID" } =~ s/\s+/_/g;
+ $record->{ "SCORE" } =~ s/\.\d*//;
+
+ push @fields, $record->{ "CHR" };
+ push @fields, $record->{ "CHR_BEG" };
+ push @fields, $record->{ "CHR_END" } + 1;
+ push @fields, $record->{ "Q_ID" };
+ push @fields, $record->{ "SCORE" };
+ }
+ elsif ( $columns == 6 )
+ {
+ $record->{ "Q_ID" } =~ s/\s+/_/g;
+ $record->{ "SCORE" } =~ s/\.\d*//;
+
+ push @fields, $record->{ "CHR" };
+ push @fields, $record->{ "CHR_BEG" };
+ push @fields, $record->{ "CHR_END" } + 1;
+ push @fields, $record->{ "Q_ID" };
+ push @fields, $record->{ "SCORE" };
+ push @fields, $record->{ "STRAND" };
+ }
+ else
+ {
+ $record->{ "Q_ID" } =~ s/\s+/_/g;
+ $record->{ "SCORE" } =~ s/\.\d*//;
+
+ push @fields, $record->{ "CHR" };
+ push @fields, $record->{ "CHR_BEG" };
+ push @fields, $record->{ "CHR_END" } + 1;
+ push @fields, $record->{ "Q_ID" };
+ push @fields, $record->{ "SCORE" };
+ push @fields, $record->{ "STRAND" };
+ push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" };
+ push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" };
+ push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" };
+ push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" };
+ push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" };
+ push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" };
+ }
+
+ if ( $fh ) {
+ print $fh join( "\t", @fields ), "\n";
+ } else {
+ print join( "\t", @fields ), "\n";
+ }
+}
+
+
+sub bed_put_entries
+{
+ # Martin A. Hansen, January 2008.
+
+ # Write a list of BED entries.
+
+ my ( $entries, # list of entries,
+ $fh, # file handle - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ map { &bed_put_entry( $_, $fh ) } @{ $entries };
+}
+
+
+sub bed_analyze
+{
+ # Martin A. Hansen, March 2008.
+
+ # Given a bed record, analysis this to give information
+ # about intron/exon sizes.
+
+ my ( $entry, # BED entry
+ ) = @_;
+
+ # Returns hashref.
+
+ my ( $i, @begs, @lens, $exon_max, $exon_min, $exon_len, $exon_tot, $intron_max, $intron_min, $intron_len, $intron_tot );
+
+ $exon_max = 0;
+ $exon_min = 9999999999;
+ $intron_max = 0;
+ $intron_min = 9999999999;
+
+ $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" };
+
+ @begs = split /,/, $entry->{ "Q_BEGS" };
+ @lens = split /,/, $entry->{ "BLOCKSIZES" };
+
+ for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
+ {
+ $exon_len = @lens[ $i ];
+
+ $entry->{ "EXON_LEN_$i" } = $exon_len;
+
+ $exon_max = $exon_len if $exon_len > $exon_max;
+ $exon_min = $exon_len if $exon_len < $exon_min;
+
+ $exon_tot += $exon_len;
+ }
+
+ $entry->{ "EXON_LEN_-1" } = $exon_len;
+ $entry->{ "EXON_MAX_LEN" } = $exon_max;
+ $entry->{ "EXON_MIN_LEN" } = $exon_min;
+ $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } );
+
+ $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1;
+ $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0;
+
+ if ( $entry->{ "INTRONS" } )
+ {
+ for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ )
+ {
+ $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] );
+
+ $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len;
+
+ $intron_max = $intron_len if $intron_len > $intron_max;
+ $intron_min = $intron_len if $intron_len < $intron_min;
+
+ $intron_tot += $intron_len;
+ }
+
+ $entry->{ "INTRON_LEN_-1" } = $intron_len;
+ $entry->{ "INTRON_MAX_LEN" } = $intron_max;
+ $entry->{ "INTRON_MIN_LEN" } = $intron_min;
+ $entry->{ "INTRON_MEAN_LEN" } = int( $intron_tot / $entry->{ "INTRONS" } );
+ }
+
+ return wantarray ? %{ $entry } : $entry;
+}
+
+
+sub bed_sort
+{
+ # Martin A. Hansen, March 2008.
+
+ # Sort a potential huge BED file according to
+ # CHR, CHR_BEG and optionally STRAND.
+
+ my ( $tmp_dir, # temporary directory used for sorting
+ $file, # BED file to sort
+ $strand, # flag to sort on strand - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $fh_in, $key, $fh_out, %fh_hash, $part_file, $entry, $entries );
+
+ $fh_in = &Maasha::Common::read_open( $file );
+
+ while ( $entry = &bed_get_entry( $fh_in ) )
+ {
+ if ( $strand ) {
+ $key = join "_", $entry->{ "CHR" }, $entry->{ "STRAND" };
+ } else {
+ $key = $entry->{ "CHR" };
+ }
+
+ $fh_hash{ $key } = &Maasha::Common::write_open( "$tmp_dir/$key.sort" ) if not exists $fh_hash{ $key };
+
+ &bed_put_entry( $entry, $fh_hash{ $key } );
+ }
+
+ close $fh_in;
+
+ map { close $_ } keys %fh_hash;
+
+ $fh_out = &Maasha::Common::write_open( "$tmp_dir/temp.sort" );
+
+ foreach $part_file ( sort keys %fh_hash )
+ {
+ $entries = &bed_get_entries( "$tmp_dir/$part_file.sort" );
+
+ @{ $entries } = sort { $a->{ "CHR_BEG" } <=> $b->{ "CHR_BEG" } } @{ $entries };
+
+ map { &bed_put_entry( $_, $fh_out ) } @{ $entries };
+
+ unlink "$tmp_dir/$part_file.sort";
+ }
+
+ close $fh_out;
+
+ rename "$tmp_dir/temp.sort", $file;
+}
+
+
+sub bed_merge_entries
+{
+ # Martin A. Hansen, February 2008.
+
+ # Merge a list of given BED entries in one big entry.
+
+ my ( $entries, # list of BED entries to be merged
+ ) = @_;
+
+ # Returns hash.
+
+ my ( $i, @q_ids, @q_begs, @blocksizes, @new_q_begs, @new_blocksizes, %new_entry );
+
+ @{ $entries } = sort { $a->{ "CHR_BEG" } <=> $b->{ "CHR_BEG" } } @{ $entries };
+
+ for ( $i = 0; $i < @{ $entries }; $i++ )
+ {
+ &Maasha::Common::error( qq(Attempted merge of BED entries from different chromosomes) ) if $entries->[ 0 ]->{ "CHR" } ne $entries->[ $i ]->{ "CHR" };
+ &Maasha::Common::error( qq(Attempted merge of BED entries from different strands) ) if $entries->[ 0 ]->{ "STRAND" } ne $entries->[ $i ]->{ "STRAND" };
+
+ push @q_ids, $entries->[ $i ]->{ "Q_ID" } || sprintf( "ID%06d", $i );
+
+ if ( exists $entries->[ $i ]->{ "Q_BEGS" } )
+ {
+ @q_begs = split ",", $entries->[ $i ]->{ "Q_BEGS" };
+ @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" };
+ }
+ else
+ {
+ @q_begs = 0;
+ @blocksizes = $entries->[ $i ]->{ "CHR_END" } - $entries->[ $i ]->{ "CHR_BEG" } + 1;
+ }
+
+ map { $_ += $entries->[ $i ]->{ "CHR_BEG" } } @q_begs;
+
+ push @new_q_begs, @q_begs;
+ push @new_blocksizes, @blocksizes;
+ }
+
+ map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs;
+
+ %new_entry = (
+ CHR => $entries->[ 0 ]->{ "CHR" },
+ CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $entries->[ -1 ]->{ "CHR_END" },
+ REC_TYPE => "BED",
+ BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1,
+ BED_COLS => 12,
+ Q_ID => join( ":", @q_ids ),
+ SCORE => 999,
+ STRAND => $entries->[ 0 ]->{ "STRAND" } || "+",
+ THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" },
+ ITEMRGB => "0,0,0",
+ BLOCKCOUNT => scalar @new_q_begs,
+ BLOCKSIZES => join( ",", @new_blocksizes ),
+ Q_BEGS => join( ",", @new_q_begs ),
+ );
+
+ return wantarray ? %new_entry : \%new_entry;
+}
+
+
+sub bed_split_entry
+{
+ # Martin A. Hansen, February 2008.
+
+ # Splits a given BED entry into a list of blocks,
+ # which are returned. A list of 6 column BED entry is returned.
+
+ my ( $entry, # BED entry hashref
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @q_begs, @blocksizes, $block, @blocks, $i );
+
+ if ( exists $entry->{ "BLOCKCOUNT" } )
+ {
+ @q_begs = split ",", $entry->{ "Q_BEGS" };
+ @blocksizes = split ",", $entry->{ "BLOCKSIZES" };
+
+ for ( $i = 0; $i < @q_begs; $i++ )
+ {
+ undef $block;
+
+ $block->{ "CHR" } = $entry->{ "CHR" };
+ $block->{ "CHR_BEG" } = $entry->{ "CHR_BEG" } + $q_begs[ $i ];
+ $block->{ "CHR_END" } = $entry->{ "CHR_BEG" } + $q_begs[ $i ] + $blocksizes[ $i ] - 1;
+ $block->{ "Q_ID" } = $entry->{ "Q_ID" } . sprintf( "_%03d", $i );
+ $block->{ "SCORE" } = $entry->{ "SCORE" };
+ $block->{ "STRAND" } = $entry->{ "STRAND" };
+ $block->{ "BED_LEN" } = $block->{ "CHR_END" } - $block->{ "CHR_BEG" } + 1,
+ $block->{ "BED_COLS" } = 6;
+ $block->{ "REC_TYPE" } = "BED";
+
+ push @blocks, $block;
+ }
+ }
+ else
+ {
+ @blocks = @{ $entry };
+ }
+
+ return wantarray ? @blocks : \@blocks;
+}
+
+
+
+sub bed_overlap
+{
+ # Martin A. Hansen, February 2008.
+
+ # Checks if two BED entries overlap and
+ # return 1 if so - else 0;
+
+ my ( $entry1, # hashref
+ $entry2, # hashref
+ $no_strand, # don't check strand flag - OPTIONAL
+ ) = @_;
+
+ # Return bolean.
+
+ return 0 if $entry1->{ "CHR" } ne $entry2->{ "CHR" };
+ return 0 if $entry1->{ "STRAND" } ne $entry2->{ "STRAND" };
+
+ if ( $entry1->{ "CHR_END" } < $entry2->{ "CHR_BEG" } or $entry1->{ "CHR_BEG" } > $entry2->{ "CHR_END" } ) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+
+sub bed_upload_to_ucsc
+{
+ # Martin A. Hansen, September 2007.
+
+ # Upload a BED file to the UCSC database.
+
+ my ( $tmp_dir, # temporary directory
+ $file, # file to upload,
+ $options, # argument hashref
+ $append, # flag indicating table should be appended
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $args, $table, $sql_file, $fh_out, $fh_in );
+
+ if ( $append ) {
+ $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", "-oldTable", $file;
+ } else {
+ $args = join " ", $options->{ "database" }, $options->{ "table" }, "-tmpDir=$tmp_dir", $file;
+ }
+
+ if ( $options->{ "sec_struct" } )
+ {
+ $table = $options->{ "table" };
+
+ &Maasha::Common::error( "Attempt to load secondary structure track without 'rnaSecStr' in table name" ) if not $table =~ /rnaSecStr/;
+
+ $sql_file = "$tmp_dir/upload_RNA_SS.sql";
+
+ $fh_out = &Maasha::Common::write_open( $sql_file );
+
+ print $fh_out qq(
+CREATE TABLE $table (
+ bin smallint not null, # Bin number for browser speedup
+ chrom varchar(255) not null, # Chromosome or FPC contig
+ chromStart int unsigned not null, # Start position in chromosome
+ chromEnd int unsigned not null, # End position in chromosome
+ name varchar(255) not null, # Name of item
+ score int unsigned not null, # Score from 0-1000
+ strand char(1) not null, # + or -
+ size int unsigned not null, # Size of element.
+ secStr longblob not null, # Parentheses and '.'s which define the secondary structure
+ conf longblob not null, # Confidence of secondary-structure annotation per position (0.0-1.0).
+ #Indices
+ INDEX(name(16)),
+ INDEX(chrom(8), bin),
+ INDEX(chrom(8), chromStart)
+);
+ );
+
+ close $fh_out;
+
+ &Maasha::Common::run( "hgLoadBed", "-notItemRgb -sqlTable=$sql_file $options->{ 'database' } $options->{ 'table' } -tmpDir=$tmp_dir $file > /dev/null 2>&1" );
+
+ unlink $sql_file;
+ }
+ else
+ {
+ &Maasha::Common::run( "hgLoadBed", "$args > /dev/null 2>&1" );
+ }
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub psl_get_entries
+{
+ # Martin A. Hansen, February 2008.
+
+ # Reads PSL entries and returns a record.
+
+ my ( $path, # full path to PSL file
+ ) = @_;
+
+ # Returns hashref.
+
+ my ( $fh, @lines, @fields, $i, %record, @records );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ @lines = <$fh>;
+
+ close $fh;
+
+ chomp @lines;
+
+ for ( $i = 5; $i < @lines; $i++ )
+ {
+ @fields = split "\t", $lines[ $i ];
+
+ &Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21;
+
+ undef %record;
+
+ %record = (
+ REC_TYPE => "PSL",
+ MATCHES => $fields[ 0 ],
+ MISMATCHES => $fields[ 1 ],
+ REPMATCHES => $fields[ 2 ],
+ NCOUNT => $fields[ 3 ],
+ QNUMINSERT => $fields[ 4 ],
+ QBASEINSERT => $fields[ 5 ],
+ SNUMINSERT => $fields[ 6 ],
+ SBASEINSERT => $fields[ 7 ],
+ STRAND => $fields[ 8 ],
+ Q_ID => $fields[ 9 ],
+ Q_LEN => $fields[ 10 ],
+ Q_BEG => $fields[ 11 ],
+ Q_END => $fields[ 12 ] - 1,
+ S_ID => $fields[ 13 ],
+ S_LEN => $fields[ 14 ],
+ S_BEG => $fields[ 15 ],
+ S_END => $fields[ 16 ] - 1,
+ BLOCKCOUNT => $fields[ 17 ],
+ BLOCKSIZES => $fields[ 18 ],
+ Q_BEGS => $fields[ 19 ],
+ S_BEGS => $fields[ 20 ],
+ );
+
+ $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
+
+ push @records, { %record };
+ }
+
+ return wantarray ? @records : \@records;
+}
+
+
+sub psl_put_header
+{
+ # Martin A. Hansen, September 2007.
+
+ # Write a PSL header to file.
+
+ my ( $fh, # file handle - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ $fh = \*STDOUT if not $fh;
+
+ print $fh qq(psLayout version 3
+match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart match match count bases count bases name size start end name size start end count
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+);
+}
+
+
+sub psl_put_entry
+{
+ # Martin A. Hansen, September 2007.
+
+ # Write a PSL entry to file.
+
+ my ( $record, # hashref
+ $fh, # file handle - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ $fh = \*STDOUT if not $fh;
+
+ my @output;
+
+ push @output, $record->{ "MATCHES" };
+ push @output, $record->{ "MISMATCHES" };
+ push @output, $record->{ "REPMATCHES" };
+ push @output, $record->{ "NCOUNT" };
+ push @output, $record->{ "QNUMINSERT" };
+ push @output, $record->{ "QBASEINSERT" };
+ push @output, $record->{ "SNUMINSERT" };
+ push @output, $record->{ "SBASEINSERT" };
+ push @output, $record->{ "STRAND" };
+ push @output, $record->{ "Q_ID" };
+ push @output, $record->{ "Q_LEN" };
+ push @output, $record->{ "Q_BEG" };
+ push @output, $record->{ "Q_END" } + 1;
+ push @output, $record->{ "S_ID" };
+ push @output, $record->{ "S_LEN" };
+ push @output, $record->{ "S_BEG" };
+ push @output, $record->{ "S_END" } + 1;
+ push @output, $record->{ "BLOCKCOUNT" };
+ push @output, $record->{ "BLOCKSIZES" };
+ push @output, $record->{ "Q_BEGS" };
+ push @output, $record->{ "S_BEGS" };
+
+ print $fh join( "\t", @output ), "\n";
+}
+
+
+sub psl_upload_to_ucsc
+{
+ # Martin A. Hansen, September 2007.
+
+ # Upload a PSL file to the UCSC database.
+
+ my ( $file, # file to upload,
+ $options, # argument hashref
+ $append, # flag indicating table should be appended
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $args );
+
+ if ( $append ) {
+ $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file;
+ } else {
+ $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file;
+ }
+
+ &Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> TRACK FILE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub update_my_tracks
+{
+ # Martin A. Hansen, September 2007.
+
+ # Update the /home/user/ucsc/my_tracks.ra file and executes makeCustomTracks.pl
+
+ my ( $options, # hashref
+ $type, # track type
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $file, $fh_in, $fh_out, $line, $time );
+
+ $file = $ENV{ "HOME" } . "/ucsc/my_tracks.ra";
+
+ # ---- create a backup ----
+
+ $fh_in = &Maasha::Common::read_open( $file );
+ $fh_out = &Maasha::Common::write_open( "$file~" );
+
+ while ( $line = <$fh_in> ) {
+ print $fh_out $line;
+ }
+
+ close $fh_in;
+ close $fh_out;
+
+ # ---- append track ----
+
+ $time = &Maasha::Common::time_stamp();
+
+ $fh_out = &Maasha::Common::append_open( $file );
+
+ if ( $type eq "sec_struct" )
+ {
+ print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
+
+ print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n";
+
+ print $fh_out "# Database $options->{ 'database' }\n\n";
+
+ print $fh_out "track $options->{ 'table' }\n";
+ print $fh_out "shortLabel $options->{ 'short_label' }\n";
+ print $fh_out "longLabel $options->{ 'long_label' }\n";
+ print $fh_out "group $options->{ 'group' }\n";
+ print $fh_out "priority $options->{ 'priority' }\n";
+ print $fh_out "visibility $options->{ 'visibility' }\n";
+ print $fh_out "color $options->{ 'color' }\n";
+ print $fh_out "type bed 6 +\n";
+ print $fh_out "mafTrack multiz17way\n";
+
+ print $fh_out "\n# //\n";
+ }
+ else
+ {
+ print $fh_out "\n\n# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
+
+ print $fh_out "\n# Track added by 'upload_to_ucsc' $time\n\n";
+
+ print $fh_out "# Database $options->{ 'database' }\n\n";
+
+ print $fh_out "track $options->{ 'table' }\n";
+ print $fh_out "shortLabel $options->{ 'short_label' }\n";
+ print $fh_out "longLabel $options->{ 'long_label' }\n";
+ print $fh_out "group $options->{ 'group' }\n";
+ print $fh_out "priority $options->{ 'priority' }\n";
+ print $fh_out "useScore 1\n" if $options->{ 'use_score' };
+ print $fh_out "visibility $options->{ 'visibility' }\n";
+ print $fh_out "maxHeightPixels 50:50:11\n" if $type eq "wig 0";
+ print $fh_out "color $options->{ 'color' }\n";
+ print $fh_out "type $type\n";
+
+ print $fh_out "\n# //\n";
+ }
+
+ close $fh_out;
+
+ &Maasha::Common::run( "ucscMakeTracks.pl", "-b > /dev/null 2>&1" );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub phastcons_get_entry
+{
+ # Martin A. Hansen, December 2007.
+
+ # Given a file handle to a PhastCons file get the
+ # next entry which is all the lines after a "fixedStep"
+ # line and until the next "fixedStep" line or EOF.
+
+ my ( $fh, # filehandle
+ ) = @_;
+
+ # Returns a list of lines
+
+ my ( $entry, @lines );
+
+ local $/ = "\nfixedStep ";
+
+ $entry = <$fh>;
+
+ chomp $entry;
+
+ @lines = split "\n", $entry;
+
+ return if @lines == 0;
+
+ $lines[ 0 ] =~ s/fixedStep?\s*//;
+
+ return wantarray ? @lines : \@lines;
+}
+
+
+sub phastcons_parse_entry
+{
+ # Martin A. Hansen, December 2007.
+
+ # Given a PhastCons entry converts this to a
+ # list of super blocks.
+
+ my ( $lines, # list of lines
+ $args, # argument hash
+ ) = @_;
+
+ # Returns
+
+ my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
+
+ $info = shift @{ $lines };
+
+ if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
+ {
+ $chr = $1;
+ $beg = $2;
+ $step = $3;
+
+ die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
+ }
+
+ $i = 0;
+
+ while ( $i < @{ $lines } )
+ {
+ if ( $lines->[ $i ] >= $args->{ "threshold" } )
+ {
+ $c = $i + 1;
+
+ while ( $c < @{ $lines } )
+ {
+ if ( $lines->[ $c ] < $args->{ "threshold" } )
+ {
+ $j = $c + 1;
+
+ while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
+ $j++;
+ }
+
+ if ( $j - $c > $args->{ "gap" } )
+ {
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $j;
+
+ last;
+ }
+
+ $c = $j
+ }
+ else
+ {
+ $c++;
+ }
+ }
+
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $c;
+ }
+ else
+ {
+ $i++;
+ }
+ }
+
+ $i = 0;
+
+ while ( $i < @blocks )
+ {
+ $c = $i + 1;
+
+ while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
+ {
+ $c++;
+ }
+
+ push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
+
+ $i = $c;
+ }
+
+ foreach $super_block ( @super_blocks )
+ {
+ foreach $block ( @{ $super_block } )
+ {
+ push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
+ push @lens, $block->{ "CHR_LEN" } - 1;
+ }
+
+ $lens[ -1 ]++;
+
+ push @entries, {
+ CHR => $super_block->[ 0 ]->{ "CHR" },
+ CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
+ Q_ID => "Q_ID",
+ SCORE => 100,
+ STRAND => "+",
+ THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
+ ITEMRGB => "0,200,100",
+ BLOCKCOUNT => scalar @{ $super_block },
+ BLOCKSIZES => join( ",", @lens ),
+ Q_BEGS => join( ",", @begs ),
+ };
+
+ undef @begs;
+ undef @lens;
+ }
+
+ return wantarray ? @entries : \@entries;
+}
+
+
+sub phastcons_index_create
+{
+ # Martin A. Hansen, January 2008.
+
+ # Indexes a concatenated PhastCons file.
+ # The index consists of a hash with chromosomes as keys,
+ # and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
+
+ my ( $path, # path to PhastCons file
+ ) = @_;
+
+ # Returns a hashref
+
+ my ( $fh, $pos, $index_beg, $index_len, $entry, $locator, $chr, $step, $beg, $end, $len, %index, $i );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ $pos = 0;
+
+ while ( $entry = &Maasha::UCSC::phastcons_get_entry( $fh ) )
+ {
+ $locator = shift @{ $entry };
+
+ if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
+ {
+ $chr = $1;
+ $beg = $2 - 1; # phastcons files are 1-based
+ $step = $3;
+ }
+ else
+ {
+ &Maasha::Common::error( qq(Could not parse PhastCons locator: $locator) );
+ }
+
+ $pos += length( $locator ) + 11;
+
+ $index_beg = $pos;
+
+# map { $pos += length( $_ ) + 1 } @{ $entry };
+
+ $pos += 6 * scalar @{ $entry };
+
+ $index_len = $pos - $index_beg;
+
+ push @{ $index{ $chr } }, [ $beg, undef, $beg + scalar @{ $entry } - 1, $index_beg, $index_len ];
+ }
+
+ close $fh;
+
+ foreach $chr ( keys %index )
+ {
+ for ( $i = 0; $i < @{ $index{ $chr } } - 1; $i++ ) {
+ $index{ $chr }->[ $i ]->[ NEXT_CHR_BEG ] = $index{ $chr }->[ $i + 1 ]->[ 0 ];
+ }
+
+ $index{ $chr }->[ -1 ]->[ NEXT_CHR_BEG ] = $index{ $chr }->[ -1 ]->[ CHR_END ] + 1;
+ }
+
+ return wantarray ? %index : \%index;
+}
+
+
+sub phastcons_index_store
+{
+ # Martin A. Hansen, January 2008.
+
+ # Writes a PhastCons index to binary file.
+
+ my ( $path, # full path to file
+ $index, # list with index
+ ) = @_;
+
+ # returns nothing
+
+ &Maasha::Common::file_store( $path, $index );
+}
+
+
+sub phastcons_index_retrieve
+{
+ # Martin A. Hansen, January 2008.
+
+ # Retrieves a PhastCons index from binary file.
+
+ my ( $path, # full path to file
+ ) = @_;
+
+ # returns list
+
+ my $index;
+
+ $index = &Maasha::Common::file_retrieve( $path );
+
+ return wantarray ? %{ $index } : $index;
+}
+
+
+sub phastcons_index_lookup
+{
+ # Martin A. Hansen, January 2008.
+
+ # Retrieve PhastCons scores from a indexed
+ # Phastcons file given a chromosome and
+ # begin and end positions.
+
+ my ( $index, # data structure
+ $fh, # filehandle to datafile
+ $chr, # chromosome
+ $chr_beg, # chromosome beg
+ $chr_end, # chromosome end
+ $flank, # include flanking region - OPTIONAL
+ ) = @_;
+
+ # Returns a list
+
+ my ( $index_beg, $index_end, $i, $c, $beg, $end, @vals, $scores );
+
+ $flank ||= 0;
+
+ $chr_beg -= $flank;
+ $chr_end += $flank;
+
+# print "chr_beg->$chr_beg chr_end->$chr_end flank->$flank\n";
+
+ if ( exists $index->{ $chr } )
+ {
+ $index_beg = &Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_beg );
+
+ if ( $index_beg < 0 ) {
+ &Maasha::Common::error( qq(Index search failed - begin index position doesn't exists: $chr_beg) );
+ }
+
+ if ( $chr_end < $index->{ $chr }->[ $index_beg ]->[ 1 ] )
+ {
+ $index_end = $index_beg;
+ }
+ else
+ {
+ $index_end = &Maasha::Matrix::interval_search( $index->{ $chr }, 0, 1, $chr_end );
+
+ if ( $index_end < 0 ) {
+ &Maasha::Common::error( qq(Index search failed - end index position doesn't exists: $chr_end) );
+ }
+ }
+
+ map { $scores->[ $_ ] = 0 } 0 .. $chr_end - $chr_beg;
+
+ if ( $index_beg == $index_end )
+ {
+ $beg = &Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] );
+ $end = &Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ CHR_END ] );
+
+ if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ CHR_END ] and $end >= $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] )
+ {
+ @vals = split "\n", &Maasha::Common::file_read(
+ $fh,
+ $index->{ $chr }->[ $index_beg ]->[ INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ),
+ 6 * ( $end - $beg + 1 ),
+ );
+ }
+
+ for ( $c = 0; $c < @vals; $c++ ) {
+ $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
+ }
+ }
+ else
+ {
+ $beg = &Maasha::Calc::max( $chr_beg, $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] );
+
+# print Dumper( $beg, $index->{ $chr }->[ $index_beg ] );
+# print Dumper( "next", $index->{ $chr }->[ $index_beg ]->[ NEXT_CHR_BEG ] );
+
+ # beg next
+ # v v
+ # |||||||||.......
+
+ if ( $beg <= $index->{ $chr }->[ $index_beg ]->[ CHR_END ] )
+ {
+ @vals = split "\n", &Maasha::Common::file_read(
+ $fh,
+ $index->{ $chr }->[ $index_beg ]->[ INDEX_BEG ] + 6 * ( $beg - $index->{ $chr }->[ $index_beg ]->[ CHR_BEG ] ),
+ 6 * ( $index->{ $chr }->[ $index_beg ]->[ CHR_END ] - $beg + 1 ),
+ );
+
+ for ( $c = 0; $c < @vals; $c++ ) {
+ $scores->[ $c + $beg - $chr_beg ] = $vals[ $c ];
+ }
+ }
+
+ $end = &Maasha::Calc::min( $chr_end, $index->{ $chr }->[ $index_end ]->[ CHR_END ] );
+
+ if ( $end <= $index->{ $chr }->[ $index_end ]->[ CHR_END ] )
+ {
+ @vals = split "\n", &Maasha::Common::file_read(
+ $fh,
+ $index->{ $chr }->[ $index_end ]->[ INDEX_BEG ],
+ 6 * ( $end - $index->{ $chr }->[ $index_end ]->[ CHR_BEG ] + 1 ),
+ );
+
+ for ( $c = 0; $c < @vals; $c++ ) {
+ $scores->[ $c + $index->{ $chr }->[ $index_end ]->[ CHR_BEG ] - $chr_beg ] = $vals[ $c ];
+ }
+ }
+
+ for ( $i = $index_beg + 1; $i <= $index_end - 1; $i++ )
+ {
+ @vals = split "\n", &Maasha::Common::file_read(
+ $fh,
+ $index->{ $chr }->[ $i ]->[ INDEX_BEG ],
+ 6 * ( $index->{ $chr }->[ $i ]->[ CHR_END ] - $index->{ $chr }->[ $i ]->[ CHR_BEG ] + 1 ),
+ );
+
+ for ( $c = 0; $c < @vals; $c++ ) {
+ $scores->[ $c + $index->{ $chr }->[ $i ]->[ CHR_BEG ] - $chr_beg ] = $vals[ $c ];
+ }
+ }
+ }
+ }
+ else
+ {
+ &Maasha::Common::error( qq(Chromosome "$chr" was not found in index) );
+ }
+
+ return wantarray ? @{ $scores } : $scores;
+}
+
+
+sub phastcons_normalize
+{
+ # Martin A. Hansen, January 2008.
+
+ # Normalizes a list of lists with PhastCons scores,
+ # in such a way that each list contains the same number
+ # or PhastCons scores.
+
+ my ( $AoA, # AoA with PhastCons scores
+ ) = @_;
+
+ # Returns AoA.
+
+ my ( $list, $max, $min, $mean, $diff );
+
+ $min = 99999999;
+ $max = 0;
+
+ foreach $list ( @{ $AoA } )
+ {
+ $min = scalar @{ $list } if scalar @{ $list } < $min;
+ $max = scalar @{ $list } if scalar @{ $list } > $max;
+ }
+
+ $mean = int( ( $min + $max ) / 2 );
+
+# print STDERR "min->$min max->$max mean->$mean\n";
+
+ foreach $list ( @{ $AoA } )
+ {
+ $diff = scalar @{ $list } - $mean;
+
+ &phastcons_list_inflate( $list, abs( $diff ) ) if $diff < 0;
+ &phastcons_list_deflate( $list, $diff ) if $diff > 0;
+ }
+
+ return wantarray ? @{ $AoA } : $AoA;
+}
+
+
+sub phastcons_list_inflate
+{
+ # Martin A. Hansen, January 2008.
+
+ # Inflates a list with a given number of elements
+ # in such a way that the extra elements are introduced
+ # evenly over the entire length of the list. The value
+ # of the extra elements is based on a mean of the
+ # adjacent elements.
+
+ my ( $list, # list of elements
+ $diff, # number of elements to introduce
+ ) = @_;
+
+ # Returns nothing
+
+ my ( $len, $space, $i, $pos );
+
+ $len = scalar @{ $list };
+
+ $space = $len / $diff;
+
+ for ( $i = 0; $i < $diff; $i++ )
+ {
+ $pos = int( ( $space / 2 ) + $i * $space );
+
+ splice @{ $list }, $pos, 0, ( $list->[ $pos - 1 ] + $list->[ $pos + 1 ] ) / 2;
+ # splice @{ $list }, $pos, 0, "X";
+ }
+
+ die qq(ERROR: bad inflate\n) if scalar @{ $list } != $len + $diff;
+}
+
+
+sub phastcons_list_deflate
+{
+ # Martin A. Hansen, January 2008.
+
+ # Deflates a list by removing a given number of elements
+ # evenly distributed over the entire list.
+
+ my ( $list, # list of elements
+ $diff, # number of elements to remove
+ ) = @_;
+
+ # Returns nothing
+
+ my ( $len, $space, $i, $pos );
+
+ $len = scalar @{ $list };
+
+ $space = ( $len - $diff ) / $diff;
+
+ for ( $i = 0; $i < $diff; $i++ )
+ {
+ $pos = int( ( $space / 2 ) + $i * $space );
+
+ splice @{ $list }, $pos, 1;
+ }
+
+ die qq(ERROR: bad deflate\n) if scalar @{ $list } != $len - $diff;
+}
+
+
+sub phastcons_mean
+{
+ # Martin A. Hansen, January 2008.
+
+ # Given a normalized PhastCons matrix in an AoA,
+ # calculate the mean for each column and return as a list.
+
+ my ( $AoA, # AoA with normalized PhastCons scores
+ ) = @_;
+
+ # Returns a list
+
+ my ( @list );
+
+ $AoA = &Maasha::Matrix::matrix_flip( $AoA );
+
+ map { push @list, &Maasha::Calc::mean( $_ ) } @{ $AoA };
+
+ return wantarray ? @list : \@list;
+}
+
+
+sub phastcons_median
+{
+ # Martin A. Hansen, January 2008.
+
+ # Given a normalized PhastCons matrix in an AoA,
+ # calculate the median for each column and return as a list.
+
+ my ( $AoA, # AoA with normalized PhastCons scores
+ ) = @_;
+
+ # Returns a list
+
+ my ( @list );
+
+ $AoA = &Maasha::Matrix::matrix_flip( $AoA );
+
+ map { push @list, &Maasha::Calc::median( $_ ) } @{ $AoA };
+
+ return wantarray ? @list : \@list;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MULTIPLE ALIGNMENT FILES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub maf_extract
+{
+ # Martin A. Hansen, April 2008.
+
+ # Executes mafFrag to extract a subalignment from a multiz track
+ # in the UCSC genome browser database.
+
+ my ( $tmp_dir, # temporary directory
+ $database, # genome database
+ $table, # table with the multiz track
+ $chr, # chromosome
+ $beg, # begin position
+ $end, # end position
+ $strand, # strand
+ ) = @_;
+
+ # Returns a list of record
+
+ my ( $tmp_file, $align );
+
+ $tmp_file = "$tmp_dir/maf_extract.maf";
+
+ &Maasha::Common::run( "mafFrag", "$database $table $chr $beg $end $strand $tmp_file" );
+
+ $align = &maf_parse( $tmp_file );
+
+ unlink $tmp_file;
+
+ return wantarray ? @{ $align } : $align;
+}
+
+
+sub maf_parse
+{
+ # Martin A. Hansen, April 2008.
+
+
+ my ( $path, # full path to MAF file
+ ) = @_;
+
+ # Returns a list of record.
+
+ my ( $fh, $line, @fields, @align );
+
+ $fh = &Maasha::Common::read_open( $path );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^s/ )
+ {
+ @fields = split / /, $line;
+
+ push @align, {
+ SEQ_NAME => $fields[ 1 ],
+ SEQ => $fields[ -1 ],
+ ALIGN => 1,
+ ALIGN_LEN => length $fields[ -1 ],
+ }
+ }
+ }
+
+ close $fh;
+
+ return wantarray ? @align : \@align;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> WIGGLE FORMAT <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub fixedstep_put_entry
+{
+ # Martin A. Hansen, April 2008.
+
+ # Outputs a block of fixedStep values.
+ # Used for outputting wiggle data.
+
+ my ( $chr, # chromosome
+ $beg, # start position
+ $block, # list of scores
+ $fh, # filehandle - OPTIONAL
+ ) = @_;
+
+ # Returns nothing.
+
+ $beg += 1; # fixedStep format is 1 based.
+
+ if ( $fh )
+ {
+ print $fh "fixedStep chrom=$chr start=$beg step=1\n";
+
+ map { printf( $fh "%d\n", ( $_ + 1 ) ) } @{ $block };
+ }
+ else
+ {
+ print "fixedStep chrom=$chr start=$beg step=1\n";
+
+ map { printf( "%d\n", ( $_ + 1 ) ) } @{ $block };
+ }
+}
+
+
+sub wiggle_upload_to_ucsc
+{
+ # Martin A. Hansen, May 2008.
+
+ # Upload a wiggle file to the UCSC database.
+
+ my ( $tmp_dir, # temporary directory
+ $wib_dir, # wib directory
+ $wig_file, # file to upload,
+ $options, # argument hashref
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $args );
+
+# $args = join " ", "-tmpDir=$tmp_dir", "-pathPrefix=$wib_dir", $options->{ "database" }, $options->{ 'table' }, $wig_file;
+
+# &Maasha::Common::run( "hgLoadWiggle", "$args > /dev/null 2>&1" );
+
+ `cd $tmp_dir && hgLoadWiggle -tmpDir=$tmp_dir -pathPrefix=$wib_dir $options->{ 'database' } $options->{ 'table' } $wig_file > /dev/null 2>&1`;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> MySQL CONF <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub ucsc_get_user
+{
+ # Martin A. Hansen, May 2008
+
+ # Fetches the MySQL database user name from the
+ # .hg.conf file in the users home directory.
+
+ # Returns a string.
+
+ my ( $fh, $line, $user );
+
+ $fh = &Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^db\.user=(.+)/ )
+ {
+ $user = $1;
+
+ last;
+ }
+ }
+
+ close $fh;
+
+ return $user;
+}
+
+
+sub ucsc_get_password
+{
+ # Martin A. Hansen, May 2008
+
+ # Fetches the MySQL database password from the
+ # .hg.conf file in the users home directory.
+
+ # Returns a string.
+
+ my ( $fh, $line, $password );
+
+ $fh = &Maasha::Common::read_open( "$ENV{ 'HOME' }/.hg.conf" );
+
+ while ( $line = <$fh> )
+ {
+ chomp $line;
+
+ if ( $line =~ /^db\.password=(.+)/ )
+ {
+ $password = $1;
+
+ last;
+ }
+ }
+
+ close $fh;
+
+ return $password;
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
+
--- /dev/null
+New developers should add their own subdirectory here for their own Perl modules:
+
+ ../biopieces/code_perl/Maasha # this is my Perl modules directory.
+ ../biopieces/code_perl/<new developer> # this could be your new modules directory.
+
+After creating your new subdirectory you can write some Perl modules and put there:
+ ../biopieces/code_perl/<new developer>/<your module 1>.pm
+ ../biopieces/code_perl/<new developer>/<your module 2>.pm
+ ...
+
+Finally, add your new Perl modules directory to the PERL5LIB path in the configuration:
+ ../biopieces/bp_conf/bashrc # look inside this file for information on how to do that.
+
+
+All done.
+
+
+Martin A. Hansen, July 2008.