]> git.donarmstrong.com Git - biopieces.git/commitdiff
added read_genbank
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 8 Dec 2010 14:40:29 +0000 (14:40 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 8 Dec 2010 14:40:29 +0000 (14:40 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1170 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/read_genbank [new file with mode: 0755]
bp_test/in/read_genbank.in [new file with mode: 0644]
bp_test/in/read_genbank.in.gz [new file with mode: 0644]
bp_test/out/read_genbank.out.1 [new file with mode: 0644]
bp_test/out/read_genbank.out.2 [new file with mode: 0644]
bp_test/out/read_genbank.out.3 [new file with mode: 0644]
bp_test/out/read_genbank.out.4 [new file with mode: 0644]
bp_test/test/test_read_genbank [new file with mode: 0755]
code_ruby/Maasha/lib/genbank.rb [new file with mode: 0644]
code_ruby/Maasha/test/test_genbank.rb [new file with mode: 0755]

diff --git a/bp_bin/read_genbank b/bp_bin/read_genbank
new file mode 100755 (executable)
index 0000000..6e471bb
--- /dev/null
@@ -0,0 +1,79 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Read Genbank entries from one or more files.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'biopieces'
+require 'genbank'
+
+casts = []
+casts << {:long=>'data_in',    :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'num',        :short=>'n', :type=>'uint',   :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'keys',       :short=>'k', :type=>'list',   :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'features',   :short=>'f', :type=>'list',   :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'qualifiers', :short=>'q', :type=>'list',   :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+hash_keys  = options[:keys].inject(Hash.new)       { |h,k| h[k.upcase.to_sym] = true; h } if options[:keys]
+hash_feats = options[:features].inject(Hash.new)   { |h,k| h[k.upcase.to_sym] = true; h } if options[:features]
+hash_quals = options[:qualifiers].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:qualifiers]
+
+bp.each_record do |record|
+  bp.puts record
+end
+
+num  = 0
+last = false
+
+if options.has_key? :data_in
+  options[:data_in].each do |file|
+    Genbank.open(file, mode='r') do |gb|
+      gb.each(hash_keys, hash_feats, hash_quals) do |entry|
+        bp.puts entry
+
+        num += 1
+
+        if options.has_key? :num and options[:num] == num
+          last = true
+          break
+        end
+      end
+    end
+
+    break if last
+  end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
diff --git a/bp_test/in/read_genbank.in b/bp_test/in/read_genbank.in
new file mode 100644 (file)
index 0000000..87f75d4
--- /dev/null
@@ -0,0 +1,165 @@
+LOCUS       SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p
+            (AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION   U49845
+VERSION     U49845.1  GI:1293613
+KEYWORDS    .
+SOURCE      Saccharomyces cerevisiae (baker's yeast)
+  ORGANISM  Saccharomyces cerevisiae
+            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
+            Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+REFERENCE   1  (bases 1 to 5028)
+  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.
+  TITLE     Cloning and sequence of REV7, a gene whose function is required for
+            DNA damage-induced mutagenesis in Saccharomyces cerevisiae
+  JOURNAL   Yeast 10 (11), 1503-1509 (1994)
+  PUBMED    7871890
+REFERENCE   2  (bases 1 to 5028)
+  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.
+  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel
+            plasma membrane glycoprotein
+  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)
+  PUBMED    8846915
+REFERENCE   3  (bases 1 to 5028)
+  AUTHORS   Roemer,T.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New
+            Haven, CT, USA
+FEATURES             Location/Qualifiers
+     source          1..5028
+                     /organism="Saccharomyces cerevisiae"
+                     /db_xref="taxon:4932"
+                     /chromosome="IX"
+                     /map="9"
+     CDS             <1..206
+                     /codon_start=3
+                     /product="TCP1-beta"
+                     /protein_id="AAA98665.1"
+                     /db_xref="GI:1293614"
+                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
+                     AEVLLRVDNIIRARPRTANRQHM"
+     gene            687..3158
+                     /gene="AXL2"
+     CDS             687..3158
+                     /gene="AXL2"
+                     /note="plasma membrane glycoprotein"
+                     /codon_start=1
+                     /function="required for axial budding pattern of S.
+                     cerevisiae"
+                     /product="Axl2p"
+                     /protein_id="AAA98666.1"
+                     /db_xref="GI:1293615"
+                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
+                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
+                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
+                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
+                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
+                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
+                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
+                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
+                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
+                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
+                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
+                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
+                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
+                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
+                     VDFSNKSNVNVGQVKDIHGRIPEML"
+     gene            complement(3300..4037)
+                     /gene="REV7"
+     CDS             complement(3300..4037)
+                     /gene="REV7"
+                     /codon_start=1
+                     /product="Rev7p"
+                     /protein_id="AAA98667.1"
+                     /db_xref="GI:1293616"
+                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
+                     FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
+                     KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
+                     RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
+                     LISGDDKILNGVYSQYEEGESIFGSLF"
+ORIGIN
+        1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
+       61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
+      121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
+      181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg
+      241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa
+      301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa
+      361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat
+      421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga
+      481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc
+      541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga
+      601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta
+      661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag
+      721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa
+      781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata
+      841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga
+      901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac
+      961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg
+     1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc
+     1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa
+     1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca
+     1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac
+     1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa
+     1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag
+     1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct
+     1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac
+     1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa
+     1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc
+     1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata
+     1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca
+     1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc
+     1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc
+     1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca
+     1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc
+     1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg
+     2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt
+     2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc
+     2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg
+     2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca
+     2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata
+     2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg
+     2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga
+     2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt
+     2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat
+     2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt
+     2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc
+     2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag
+     2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta
+     2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa
+     2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact
+     2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt
+     3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa
+     3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag
+     3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct
+     3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt
+     3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact
+     3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa
+     3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg
+     3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt
+     3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc
+     3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca
+     3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc
+     3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc
+     3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat
+     3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa
+     3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga
+     3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat
+     3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc
+     4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc
+     4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa
+     4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg
+     4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc
+     4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt
+     4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg
+     4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg
+     4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt
+     4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt
+     4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat
+     4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc
+     4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct
+     4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta
+     4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac
+     4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct
+     4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct
+     4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc
+//
diff --git a/bp_test/in/read_genbank.in.gz b/bp_test/in/read_genbank.in.gz
new file mode 100644 (file)
index 0000000..b09274e
Binary files /dev/null and b/bp_test/in/read_genbank.in.gz differ
diff --git a/bp_test/out/read_genbank.out.1 b/bp_test/out/read_genbank.out.1
new file mode 100644 (file)
index 0000000..bc821ee
--- /dev/null
@@ -0,0 +1,117 @@
+ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+DB_XREF: taxon:4932
+CHROMOSOME: IX
+MAP: 9
+FEATURE: source
+LOCATOR: 1..5028
+LOCUS: SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION: U49845
+VERSION: U49845.1  GI:1293613
+KEYWORDS: .
+SOURCE: Saccharomyces cerevisiae (baker's yeast)
+REFERENCE: 1  (bases 1 to 5028);2  (bases 1 to 5028);3  (bases 1 to 5028)
+AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T.
+TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission
+JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA
+PUBMED: 7871890;8846915
+SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgat
+---
+PRODUCT: TCP1-beta
+PROTEIN_ID: AAA98665.1
+DB_XREF: GI:1293614
+TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM
+FEATURE: CDS
+LOCATOR: <1..206
+LOCUS: SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION: U49845
+VERSION: U49845.1  GI:1293613
+KEYWORDS: .
+SOURCE: Saccharomyces cerevisiae (baker's yeast)
+ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+REFERENCE: 1  (bases 1 to 5028);2  (bases 1 to 5028);3  (bases 1 to 5028)
+AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T.
+TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission
+JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA
+PUBMED: 7871890;8846915
+SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta
+---
+GENE: AXL2
+FEATURE: gene
+LOCATOR: 687..3158
+LOCUS: SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION: U49845
+VERSION: U49845.1  GI:1293613
+KEYWORDS: .
+SOURCE: Saccharomyces cerevisiae (baker's yeast)
+ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+REFERENCE: 1  (bases 1 to 5028);2  (bases 1 to 5028);3  (bases 1 to 5028)
+AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T.
+TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission
+JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA
+PUBMED: 7871890;8846915
+SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg
+---
+GENE: AXL2
+NOTE: plasma membrane glycoprotein
+FUNCTION: required for axial budding pattern of S.cerevisiae
+PRODUCT: Axl2p
+PROTEIN_ID: AAA98666.1
+DB_XREF: GI:1293615
+TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML
+FEATURE: CDS
+LOCATOR: 687..3158
+LOCUS: SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION: U49845
+VERSION: U49845.1  GI:1293613
+KEYWORDS: .
+SOURCE: Saccharomyces cerevisiae (baker's yeast)
+ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+REFERENCE: 1  (bases 1 to 5028);2  (bases 1 to 5028);3  (bases 1 to 5028)
+AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T.
+TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission
+JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA
+PUBMED: 7871890;8846915
+SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg
+---
+GENE: REV7
+FEATURE: gene
+LOCATOR: complement(3300..4037)
+LOCUS: SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION: U49845
+VERSION: U49845.1  GI:1293613
+KEYWORDS: .
+SOURCE: Saccharomyces cerevisiae (baker's yeast)
+ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+REFERENCE: 1  (bases 1 to 5028);2  (bases 1 to 5028);3  (bases 1 to 5028)
+AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T.
+TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission
+JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA
+PUBMED: 7871890;8846915
+SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa
+---
+GENE: REV7
+PRODUCT: Rev7p
+PROTEIN_ID: AAA98667.1
+DB_XREF: GI:1293616
+TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF
+FEATURE: CDS
+LOCATOR: complement(3300..4037)
+LOCUS: SCU49845     5028 bp    DNA             PLN       21-JUN-1999
+DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds.
+ACCESSION: U49845
+VERSION: U49845.1  GI:1293613
+KEYWORDS: .
+SOURCE: Saccharomyces cerevisiae (baker's yeast)
+ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces.
+REFERENCE: 1  (bases 1 to 5028);2  (bases 1 to 5028);3  (bases 1 to 5028)
+AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T.
+TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission
+JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA
+PUBMED: 7871890;8846915
+SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa
+---
diff --git a/bp_test/out/read_genbank.out.2 b/bp_test/out/read_genbank.out.2
new file mode 100644 (file)
index 0000000..362f142
--- /dev/null
@@ -0,0 +1,52 @@
+ORGANISM: Saccharomyces cerevisiae
+DB_XREF: taxon:4932
+CHROMOSOME: IX
+MAP: 9
+FEATURE: source
+LOCATOR: 1..5028
+ACCESSION: U49845
+SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgat
+---
+PRODUCT: TCP1-beta
+PROTEIN_ID: AAA98665.1
+DB_XREF: GI:1293614
+TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM
+FEATURE: CDS
+LOCATOR: <1..206
+ACCESSION: U49845
+SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta
+---
+GENE: AXL2
+FEATURE: gene
+LOCATOR: 687..3158
+ACCESSION: U49845
+SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg
+---
+GENE: AXL2
+NOTE: plasma membrane glycoprotein
+FUNCTION: required for axial budding pattern of S.cerevisiae
+PRODUCT: Axl2p
+PROTEIN_ID: AAA98666.1
+DB_XREF: GI:1293615
+TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML
+FEATURE: CDS
+LOCATOR: 687..3158
+ACCESSION: U49845
+SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg
+---
+GENE: REV7
+FEATURE: gene
+LOCATOR: complement(3300..4037)
+ACCESSION: U49845
+SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa
+---
+GENE: REV7
+PRODUCT: Rev7p
+PROTEIN_ID: AAA98667.1
+DB_XREF: GI:1293616
+TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF
+FEATURE: CDS
+LOCATOR: complement(3300..4037)
+ACCESSION: U49845
+SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa
+---
diff --git a/bp_test/out/read_genbank.out.3 b/bp_test/out/read_genbank.out.3
new file mode 100644 (file)
index 0000000..42b371b
--- /dev/null
@@ -0,0 +1,31 @@
+PRODUCT: TCP1-beta
+PROTEIN_ID: AAA98665.1
+DB_XREF: GI:1293614
+TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM
+FEATURE: CDS
+LOCATOR: <1..206
+ACCESSION: U49845
+SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta
+---
+GENE: AXL2
+NOTE: plasma membrane glycoprotein
+FUNCTION: required for axial budding pattern of S.cerevisiae
+PRODUCT: Axl2p
+PROTEIN_ID: AAA98666.1
+DB_XREF: GI:1293615
+TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML
+FEATURE: CDS
+LOCATOR: 687..3158
+ACCESSION: U49845
+SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg
+---
+GENE: REV7
+PRODUCT: Rev7p
+PROTEIN_ID: AAA98667.1
+DB_XREF: GI:1293616
+TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF
+FEATURE: CDS
+LOCATOR: complement(3300..4037)
+ACCESSION: U49845
+SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa
+---
diff --git a/bp_test/out/read_genbank.out.4 b/bp_test/out/read_genbank.out.4
new file mode 100644 (file)
index 0000000..c2341e6
--- /dev/null
@@ -0,0 +1,18 @@
+TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM
+FEATURE: CDS
+LOCATOR: <1..206
+ACCESSION: U49845
+SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta
+---
+TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML
+FEATURE: CDS
+LOCATOR: 687..3158
+ACCESSION: U49845
+SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg
+---
+TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF
+FEATURE: CDS
+LOCATOR: complement(3300..4037)
+ACCESSION: U49845
+SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa
+---
diff --git a/bp_test/test/test_read_genbank b/bp_test/test/test_read_genbank
new file mode 100755 (executable)
index 0000000..1ad3909
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -i $in -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -i $in.gz -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -i $in -k AC -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -i $in -k AC -f CDS -O $tmp"
+assert_no_diff $tmp $out.3
+clean
+
+run "$bp -i $in -k AC -f CDS -q translation -O $tmp"
+assert_no_diff $tmp $out.4
+clean
diff --git a/code_ruby/Maasha/lib/genbank.rb b/code_ruby/Maasha/lib/genbank.rb
new file mode 100644 (file)
index 0000000..1bff05d
--- /dev/null
@@ -0,0 +1,372 @@
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'seq'
+require 'zlib'
+
+# Error class for all exceptions to do with Genbank.
+class GenbankError < StandardError; end
+
+class Genbank
+  include Enumerable
+
+  # Class method allowing open to be used on (zipped) files.
+  # See File.open.
+  def self.open(*args)
+    ios = self.zopen(*args)
+
+    if block_given?
+      begin
+        yield ios
+      ensure
+        ios.close
+      end
+    else
+      return ios
+    end
+  end
+
+  def initialize(io)
+    @io      = io
+    @entry   = []
+  end
+
+  # Method to close ios.
+  def close
+    @io.close
+  end
+
+  # Iterator method for parsing Genbank entries.
+  def each(hash_keys, hash_feats, hash_quals)
+    while @entry = get_entry do
+      keys = get_keys(hash_keys)
+      seq  = get_seq
+
+      features = GenbankFeatures.new(@entry, hash_feats, hash_quals)
+
+      features.each do |record|
+        keys.each_pair { |key,val| record[key] = val }
+        loc = Locator.new(record[:LOCATOR], seq)
+        record[:SEQ] = loc.subseq.seq
+
+        yield record
+      end
+    end
+  end
+
+  private
+
+  # Helper method to return an ios to a file that may be zipped in which case
+  # the ios is unzipped on the fly. See File.open.
+  def self.zopen(*args)
+    ios = File.open(*args)
+
+    begin
+      ios = Zlib::GzipReader.new(ios)
+    rescue
+      ios.rewind
+    end
+
+    self.new(ios)
+  end
+
+  # Method to get the next Genbank entry form an ios and return this.
+  def get_entry
+    block = @io.gets("//" + $/)
+    return nil if block.nil?
+
+    block.chomp!("//" + $/ )
+
+    entry = block.split $/
+
+    return nil if entry.empty?
+
+    entry
+  end
+
+  # Method to get the DNA sequence from a Genbank entry and return
+  # this as a Seq object.
+  def get_seq
+    seq = Seq.new
+    i   = @entry.size
+
+    while @entry[i] !~ /^ORIGIN/
+      i -= 1
+    end
+
+    seq.seq  = @entry[i + 1 .. @entry.size].join.delete( " 0123456789")
+    seq.type = "dna"
+    seq
+  end
+
+  # Method to get the base keys from Genbank entry and return these
+  # in a hash.
+  def get_keys(hash_keys)
+    keys = {}
+    i    = 0
+    j    = 0
+
+    while @entry[i] !~ /^FEATURES/
+      if @entry[i] =~ /^\s{0,3}([A-Z]{2})/
+        if want_key?(hash_keys, $1)
+          j = i + 1
+
+          key, val = @entry[i].lstrip.split(/\s+/, 2)
+
+          while @entry[j] !~ /^\s{0,3}[A-Z]/
+            val << @entry[j].lstrip
+            j += 1
+          end
+
+          if keys[key.to_sym]
+            keys[key.to_sym] << ";" + val
+          else
+            keys[key.to_sym] = val
+          end
+        end
+      end
+
+      j > i ? i = j : i += 1
+    end
+
+    keys
+  end
+
+  def want_key?(hash_keys, key)
+    if hash_keys
+      if hash_keys[key.to_sym]
+        return true
+      else
+        return false
+      end
+    else
+      return true
+    end
+  end
+end
+
+class GenbankFeatures
+  @@i = 0
+  @@j = 0
+
+  def initialize(entry, hash_feats, hash_quals)
+    @entry      = entry
+    @hash_feats = hash_feats
+    @hash_quals = hash_quals
+  end
+
+  def each
+    while @entry[@@i] and @entry[@@i] !~ /^ORIGIN/
+      if @entry[@@i] =~ /^\s{5}([A-Za-z_-]+)/
+        if want_feat? $1
+          record = {}
+
+          feat, loc = @entry[@@i].lstrip.split(/\s+/, 2)
+
+          @@j = @@i + 1
+
+          while @entry[@@j] and @entry[@@j] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/
+            loc << @entry[@@j].lstrip
+            @@j += 1
+          end
+
+          get_quals.each_pair { |k,v|
+            record[k.upcase.to_sym] = v
+          }
+
+          record[:FEATURE] = feat
+          record[:LOCATOR] = loc
+
+          yield record
+        end
+      end
+
+      @@j > @@i ? @@i = @@j : @@i += 1
+    end
+  end
+
+  private
+
+  def get_quals
+    quals = {}
+    k     = 0
+
+    while @entry[@@j] and @entry[@@j] !~ /^\s{5}[A-Za-z_-]|^[A-Z]/
+      if @entry[@@j] =~ /^\s{21}\/([^=]+)="([^"]+)/
+        qual = $1
+        val  = $2
+
+        if want_qual? qual
+          k = @@j + 1
+
+          while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/
+            val << @entry[k].lstrip.chomp('"')
+            k += 1
+          end
+
+          if quals[qual]
+            quals[qual] << ";" + val
+          else
+            quals[qual] = val
+          end
+        end
+      end
+
+      k > @@j ? @@j = k : @@j += 1
+    end
+
+    quals
+  end
+
+  def want_feat?(feat)
+    if @hash_feats
+      if @hash_feats[feat.upcase.to_sym]
+        return true
+      else
+        return false
+      end
+    else
+      return true
+    end
+  end
+
+  def want_qual?(qual)
+    if @hash_quals
+      if @hash_quals[qual.upcase.to_sym]
+        return true
+      else
+        return false
+      end
+    else
+      return true
+    end
+  end
+end
+
+
+# Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators.
+class LocatorError < StandardError; end
+
+class Locator
+  attr_accessor :locator, :seq, :subseq
+
+  def initialize(locator, seq)
+    @locator = locator
+    @seq     = seq
+    @subseq  = Seq.new(nil, "", "dna")
+  end
+
+  def subseq
+    parse_locator
+  end
+
+  def to_s
+    @locator
+  end
+
+  private
+
+  # Method that uses recursion to parse a locator string from a feature
+  # table and fetches the appropriate subsequence. the operators
+  # join(), complement(), and order() are handled.
+  # the locator string is broken into a comma separated lists, and
+  # modified if the params donnot balance. otherwise the comma separated
+  # list of ranges are stripped from operators, and the subsequence are
+  # fetched and handled according to the operators.
+  # SNP locators are also dealt with (single positions).
+  def parse_locator(join = nil, comp = nil, order = nil)
+    intervals = @locator.split(",")
+
+    unless balance_params?(intervals.first)   # locator includes a join/comp/order of several ranges
+      case @locator
+      when /^join\((.*)\)$/
+        @locator = $1
+        join     = true
+      when /^complement\((.*)\)$/
+        @locator = $1
+        comp     = true
+      when /^order\((.*)\)$/
+        @locator = $1
+        order    = true
+      end
+
+      parse_locator(join, comp, order)
+    else
+      intervals.each do |interval|
+        case interval
+        when /^join\((.*)\)$/
+          @locator = $1
+          join     = true
+          parse_locator(join, comp, order)
+        when /^complement\((.*)\)$/
+          @locator = $1
+          comp     = true
+          parse_locator(join, comp, order)
+        when /^order\((.*)\)$/
+          @locator = $1
+          order    = true
+          parse_locator(join, comp, order)
+        when /^[<>]?(\d+)[^\d]+(\d+)$/
+          int_beg = $1.to_i - 1
+          int_end = $2.to_i - 1
+
+          newseq = Seq.new(nil, @seq.seq[int_beg...int_end], "dna")
+          newseq.revcomp if comp 
+
+          @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
+        when /^(\d+)$/
+          pos = $1.to_i - 1
+
+          newseq = Seq.new(nil, @seq.seq[pos], "dna")
+          newseq.revcomp if comp 
+
+          @subseq.seq << (order ? " " + newseq.seq : newseq.seq)
+        else
+          $stderr.puts "WARNING: Could not match locator -> #{locator}";
+          @subseq.seq << ""
+        end
+      end
+    end
+
+    return @subseq
+  end
+
+  def balance_params?(locator)
+    parens = 0
+
+    locator.each_char do |char|
+      case char
+      when '(' then parens += 1
+      when ')' then parens -= 1
+      end
+    end
+
+    if parens == 0
+      return true
+    else
+      return false
+    end
+  end
+end
+
+
+__END__
diff --git a/code_ruby/Maasha/test/test_genbank.rb b/code_ruby/Maasha/test/test_genbank.rb
new file mode 100755 (executable)
index 0000000..973ee89
--- /dev/null
@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby
+
+require 'genbank'
+require 'seq'
+require 'test/unit'
+require 'pp'
+
+class TestGenbank < Test::Unit::TestCase 
+  def setup
+    seq  = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", "dna")
+    @loc = Locator.new("", seq)
+  end
+
+  def test_Locator_with_single_position_returns_correctly
+    @loc.locator = "10"
+    assert_equal("a", @loc.subseq.seq)
+  end
+
+  def test_Locator_with_single_interval_returns_correctly
+    @loc.locator = "5..10"
+    assert_equal("gatca", @loc.subseq.seq)
+  end
+
+  def test_Locator_with_multiple_intervals_return_correctly
+    @loc.locator = "5..10,15..20"
+    assert_equal("gatcataaca", @loc.subseq.seq)
+  end
+
+  def test_Locator_with_join_multiple_intervals_return_correctly
+    @loc.locator = "join(5..10,15..20)"
+    assert_equal("gatcataaca", @loc.subseq.seq)
+  end
+
+  def test_Locator_with_complement_and_single_interval_return_correctly
+    @loc.locator = "complement(5..10)"
+    assert_equal("tgatc", @loc.subseq.seq)
+  end
+end
+
+
+__END__