From: martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d> Date: Wed, 8 Dec 2010 14:40:29 +0000 (+0000) Subject: added read_genbank X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=383bc2adebafb06fd81d27dd3bb5ffef1887c1b2;p=biopieces.git added read_genbank git-svn-id: http://biopieces.googlecode.com/svn/trunk@1170 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/read_genbank b/bp_bin/read_genbank new file mode 100755 index 0000000..6e471bb --- /dev/null +++ b/bp_bin/read_genbank @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read Genbank entries from one or more files. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'biopieces' +require 'genbank' + +casts = [] +casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'keys', :short=>'k', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'features', :short=>'f', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'qualifiers', :short=>'q', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +hash_keys = options[:keys].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:keys] +hash_feats = options[:features].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:features] +hash_quals = options[:qualifiers].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:qualifiers] + +bp.each_record do |record| + bp.puts record +end + +num = 0 +last = false + +if options.has_key? :data_in + options[:data_in].each do |file| + Genbank.open(file, mode='r') do |gb| + gb.each(hash_keys, hash_feats, hash_quals) do |entry| + bp.puts entry + + num += 1 + + if options.has_key? :num and options[:num] == num + last = true + break + end + end + end + + break if last + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_test/in/read_genbank.in b/bp_test/in/read_genbank.in new file mode 100644 index 0000000..87f75d4 --- /dev/null +++ b/bp_test/in/read_genbank.in @@ -0,0 +1,165 @@ +LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p + (AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION U49845 +VERSION U49845.1 GI:1293613 +KEYWORDS . +SOURCE Saccharomyces cerevisiae (baker's yeast) + ORGANISM Saccharomyces cerevisiae + Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes; + Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE 1 (bases 1 to 5028) + AUTHORS Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W. + TITLE Cloning and sequence of REV7, a gene whose function is required for + DNA damage-induced mutagenesis in Saccharomyces cerevisiae + JOURNAL Yeast 10 (11), 1503-1509 (1994) + PUBMED 7871890 +REFERENCE 2 (bases 1 to 5028) + AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M. + TITLE Selection of axial growth sites in yeast requires Axl2p, a novel + plasma membrane glycoprotein + JOURNAL Genes Dev. 10 (7), 777-793 (1996) + PUBMED 8846915 +REFERENCE 3 (bases 1 to 5028) + AUTHORS Roemer,T. + TITLE Direct Submission + JOURNAL Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New + Haven, CT, USA +FEATURES Location/Qualifiers + source 1..5028 + /organism="Saccharomyces cerevisiae" + /db_xref="taxon:4932" + /chromosome="IX" + /map="9" + CDS <1..206 + /codon_start=3 + /product="TCP1-beta" + /protein_id="AAA98665.1" + /db_xref="GI:1293614" + /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA + AEVLLRVDNIIRARPRTANRQHM" + gene 687..3158 + /gene="AXL2" + CDS 687..3158 + /gene="AXL2" + /note="plasma membrane glycoprotein" + /codon_start=1 + /function="required for axial budding pattern of S. + cerevisiae" + /product="Axl2p" + /protein_id="AAA98666.1" + /db_xref="GI:1293615" + /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF + TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN + VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE + VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE + TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV + YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG + DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ + DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA + NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA + CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN + NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ + SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS + YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK + HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL + VDFSNKSNVNVGQVKDIHGRIPEML" + gene complement(3300..4037) + /gene="REV7" + CDS complement(3300..4037) + /gene="REV7" + /codon_start=1 + /product="Rev7p" + /protein_id="AAA98667.1" + /db_xref="GI:1293616" + /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ + FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD + KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR + RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK + LISGDDKILNGVYSQYEEGESIFGSLF" +ORIGIN + 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg + 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct + 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa + 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg + 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa + 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa + 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat + 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga + 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc + 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga + 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta + 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag + 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa + 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata + 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga + 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac + 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg + 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc + 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa + 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca + 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac + 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa + 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag + 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct + 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac + 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa + 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc + 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata + 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca + 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc + 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc + 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca + 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc + 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg + 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt + 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc + 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg + 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca + 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata + 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg + 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga + 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt + 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat + 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt + 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc + 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag + 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta + 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa + 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact + 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt + 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa + 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag + 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct + 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt + 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact + 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa + 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg + 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt + 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc + 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca + 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc + 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc + 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat + 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa + 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga + 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat + 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc + 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc + 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa + 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg + 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc + 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt + 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg + 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg + 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt + 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt + 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat + 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc + 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct + 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta + 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac + 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct + 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct + 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc +// diff --git a/bp_test/in/read_genbank.in.gz b/bp_test/in/read_genbank.in.gz new file mode 100644 index 0000000..b09274e Binary files /dev/null and b/bp_test/in/read_genbank.in.gz differ diff --git a/bp_test/out/read_genbank.out.1 b/bp_test/out/read_genbank.out.1 new file mode 100644 index 0000000..bc821ee --- /dev/null +++ b/bp_test/out/read_genbank.out.1 @@ -0,0 +1,117 @@ +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +DB_XREF: taxon:4932 +CHROMOSOME: IX +MAP: 9 +FEATURE: source +LOCATOR: 1..5028 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgat +--- +PRODUCT: TCP1-beta +PROTEIN_ID: AAA98665.1 +DB_XREF: GI:1293614 +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +GENE: AXL2 +FEATURE: gene +LOCATOR: 687..3158 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: AXL2 +NOTE: plasma membrane glycoprotein +FUNCTION: required for axial budding pattern of S.cerevisiae +PRODUCT: Axl2p +PROTEIN_ID: AAA98666.1 +DB_XREF: GI:1293615 +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: REV7 +FEATURE: gene +LOCATOR: complement(3300..4037) +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- +GENE: REV7 +PRODUCT: Rev7p +PROTEIN_ID: AAA98667.1 +DB_XREF: GI:1293616 +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/out/read_genbank.out.2 b/bp_test/out/read_genbank.out.2 new file mode 100644 index 0000000..362f142 --- /dev/null +++ b/bp_test/out/read_genbank.out.2 @@ -0,0 +1,52 @@ +ORGANISM: Saccharomyces cerevisiae +DB_XREF: taxon:4932 +CHROMOSOME: IX +MAP: 9 +FEATURE: source +LOCATOR: 1..5028 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgat +--- +PRODUCT: TCP1-beta +PROTEIN_ID: AAA98665.1 +DB_XREF: GI:1293614 +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +GENE: AXL2 +FEATURE: gene +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: AXL2 +NOTE: plasma membrane glycoprotein +FUNCTION: required for axial budding pattern of S.cerevisiae +PRODUCT: Axl2p +PROTEIN_ID: AAA98666.1 +DB_XREF: GI:1293615 +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: REV7 +FEATURE: gene +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- +GENE: REV7 +PRODUCT: Rev7p +PROTEIN_ID: AAA98667.1 +DB_XREF: GI:1293616 +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/out/read_genbank.out.3 b/bp_test/out/read_genbank.out.3 new file mode 100644 index 0000000..42b371b --- /dev/null +++ b/bp_test/out/read_genbank.out.3 @@ -0,0 +1,31 @@ +PRODUCT: TCP1-beta +PROTEIN_ID: AAA98665.1 +DB_XREF: GI:1293614 +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +GENE: AXL2 +NOTE: plasma membrane glycoprotein +FUNCTION: required for axial budding pattern of S.cerevisiae +PRODUCT: Axl2p +PROTEIN_ID: AAA98666.1 +DB_XREF: GI:1293615 +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: REV7 +PRODUCT: Rev7p +PROTEIN_ID: AAA98667.1 +DB_XREF: GI:1293616 +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/out/read_genbank.out.4 b/bp_test/out/read_genbank.out.4 new file mode 100644 index 0000000..c2341e6 --- /dev/null +++ b/bp_test/out/read_genbank.out.4 @@ -0,0 +1,18 @@ +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/test/test_read_genbank b/bp_test/test/test_read_genbank new file mode 100755 index 0000000..1ad3909 --- /dev/null +++ b/bp_test/test/test_read_genbank @@ -0,0 +1,23 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -i $in -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -i $in.gz -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -i $in -k AC -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -i $in -k AC -f CDS -O $tmp" +assert_no_diff $tmp $out.3 +clean + +run "$bp -i $in -k AC -f CDS -q translation -O $tmp" +assert_no_diff $tmp $out.4 +clean diff --git a/code_ruby/Maasha/lib/genbank.rb b/code_ruby/Maasha/lib/genbank.rb new file mode 100644 index 0000000..1bff05d --- /dev/null +++ b/code_ruby/Maasha/lib/genbank.rb @@ -0,0 +1,372 @@ +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'seq' +require 'zlib' + +# Error class for all exceptions to do with Genbank. +class GenbankError < StandardError; end + +class Genbank + include Enumerable + + # Class method allowing open to be used on (zipped) files. + # See File.open. + def self.open(*args) + ios = self.zopen(*args) + + if block_given? + begin + yield ios + ensure + ios.close + end + else + return ios + end + end + + def initialize(io) + @io = io + @entry = [] + end + + # Method to close ios. + def close + @io.close + end + + # Iterator method for parsing Genbank entries. + def each(hash_keys, hash_feats, hash_quals) + while @entry = get_entry do + keys = get_keys(hash_keys) + seq = get_seq + + features = GenbankFeatures.new(@entry, hash_feats, hash_quals) + + features.each do |record| + keys.each_pair { |key,val| record[key] = val } + loc = Locator.new(record[:LOCATOR], seq) + record[:SEQ] = loc.subseq.seq + + yield record + end + end + end + + private + + # Helper method to return an ios to a file that may be zipped in which case + # the ios is unzipped on the fly. See File.open. + def self.zopen(*args) + ios = File.open(*args) + + begin + ios = Zlib::GzipReader.new(ios) + rescue + ios.rewind + end + + self.new(ios) + end + + # Method to get the next Genbank entry form an ios and return this. + def get_entry + block = @io.gets("//" + $/) + return nil if block.nil? + + block.chomp!("//" + $/ ) + + entry = block.split $/ + + return nil if entry.empty? + + entry + end + + # Method to get the DNA sequence from a Genbank entry and return + # this as a Seq object. + def get_seq + seq = Seq.new + i = @entry.size + + while @entry[i] !~ /^ORIGIN/ + i -= 1 + end + + seq.seq = @entry[i + 1 .. @entry.size].join.delete( " 0123456789") + seq.type = "dna" + seq + end + + # Method to get the base keys from Genbank entry and return these + # in a hash. + def get_keys(hash_keys) + keys = {} + i = 0 + j = 0 + + while @entry[i] !~ /^FEATURES/ + if @entry[i] =~ /^\s{0,3}([A-Z]{2})/ + if want_key?(hash_keys, $1) + j = i + 1 + + key, val = @entry[i].lstrip.split(/\s+/, 2) + + while @entry[j] !~ /^\s{0,3}[A-Z]/ + val << @entry[j].lstrip + j += 1 + end + + if keys[key.to_sym] + keys[key.to_sym] << ";" + val + else + keys[key.to_sym] = val + end + end + end + + j > i ? i = j : i += 1 + end + + keys + end + + def want_key?(hash_keys, key) + if hash_keys + if hash_keys[key.to_sym] + return true + else + return false + end + else + return true + end + end +end + +class GenbankFeatures + @@i = 0 + @@j = 0 + + def initialize(entry, hash_feats, hash_quals) + @entry = entry + @hash_feats = hash_feats + @hash_quals = hash_quals + end + + def each + while @entry[@@i] and @entry[@@i] !~ /^ORIGIN/ + if @entry[@@i] =~ /^\s{5}([A-Za-z_-]+)/ + if want_feat? $1 + record = {} + + feat, loc = @entry[@@i].lstrip.split(/\s+/, 2) + + @@j = @@i + 1 + + while @entry[@@j] and @entry[@@j] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/ + loc << @entry[@@j].lstrip + @@j += 1 + end + + get_quals.each_pair { |k,v| + record[k.upcase.to_sym] = v + } + + record[:FEATURE] = feat + record[:LOCATOR] = loc + + yield record + end + end + + @@j > @@i ? @@i = @@j : @@i += 1 + end + end + + private + + def get_quals + quals = {} + k = 0 + + while @entry[@@j] and @entry[@@j] !~ /^\s{5}[A-Za-z_-]|^[A-Z]/ + if @entry[@@j] =~ /^\s{21}\/([^=]+)="([^"]+)/ + qual = $1 + val = $2 + + if want_qual? qual + k = @@j + 1 + + while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/ + val << @entry[k].lstrip.chomp('"') + k += 1 + end + + if quals[qual] + quals[qual] << ";" + val + else + quals[qual] = val + end + end + end + + k > @@j ? @@j = k : @@j += 1 + end + + quals + end + + def want_feat?(feat) + if @hash_feats + if @hash_feats[feat.upcase.to_sym] + return true + else + return false + end + else + return true + end + end + + def want_qual?(qual) + if @hash_quals + if @hash_quals[qual.upcase.to_sym] + return true + else + return false + end + else + return true + end + end +end + + +# Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators. +class LocatorError < StandardError; end + +class Locator + attr_accessor :locator, :seq, :subseq + + def initialize(locator, seq) + @locator = locator + @seq = seq + @subseq = Seq.new(nil, "", "dna") + end + + def subseq + parse_locator + end + + def to_s + @locator + end + + private + + # Method that uses recursion to parse a locator string from a feature + # table and fetches the appropriate subsequence. the operators + # join(), complement(), and order() are handled. + # the locator string is broken into a comma separated lists, and + # modified if the params donnot balance. otherwise the comma separated + # list of ranges are stripped from operators, and the subsequence are + # fetched and handled according to the operators. + # SNP locators are also dealt with (single positions). + def parse_locator(join = nil, comp = nil, order = nil) + intervals = @locator.split(",") + + unless balance_params?(intervals.first) # locator includes a join/comp/order of several ranges + case @locator + when /^join\((.*)\)$/ + @locator = $1 + join = true + when /^complement\((.*)\)$/ + @locator = $1 + comp = true + when /^order\((.*)\)$/ + @locator = $1 + order = true + end + + parse_locator(join, comp, order) + else + intervals.each do |interval| + case interval + when /^join\((.*)\)$/ + @locator = $1 + join = true + parse_locator(join, comp, order) + when /^complement\((.*)\)$/ + @locator = $1 + comp = true + parse_locator(join, comp, order) + when /^order\((.*)\)$/ + @locator = $1 + order = true + parse_locator(join, comp, order) + when /^[<>]?(\d+)[^\d]+(\d+)$/ + int_beg = $1.to_i - 1 + int_end = $2.to_i - 1 + + newseq = Seq.new(nil, @seq.seq[int_beg...int_end], "dna") + newseq.revcomp if comp + + @subseq.seq << (order ? " " + newseq.seq : newseq.seq) + when /^(\d+)$/ + pos = $1.to_i - 1 + + newseq = Seq.new(nil, @seq.seq[pos], "dna") + newseq.revcomp if comp + + @subseq.seq << (order ? " " + newseq.seq : newseq.seq) + else + $stderr.puts "WARNING: Could not match locator -> #{locator}"; + @subseq.seq << "" + end + end + end + + return @subseq + end + + def balance_params?(locator) + parens = 0 + + locator.each_char do |char| + case char + when '(' then parens += 1 + when ')' then parens -= 1 + end + end + + if parens == 0 + return true + else + return false + end + end +end + + +__END__ diff --git a/code_ruby/Maasha/test/test_genbank.rb b/code_ruby/Maasha/test/test_genbank.rb new file mode 100755 index 0000000..973ee89 --- /dev/null +++ b/code_ruby/Maasha/test/test_genbank.rb @@ -0,0 +1,41 @@ +#!/usr/bin/env ruby + +require 'genbank' +require 'seq' +require 'test/unit' +require 'pp' + +class TestGenbank < Test::Unit::TestCase + def setup + seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", "dna") + @loc = Locator.new("", seq) + end + + def test_Locator_with_single_position_returns_correctly + @loc.locator = "10" + assert_equal("a", @loc.subseq.seq) + end + + def test_Locator_with_single_interval_returns_correctly + @loc.locator = "5..10" + assert_equal("gatca", @loc.subseq.seq) + end + + def test_Locator_with_multiple_intervals_return_correctly + @loc.locator = "5..10,15..20" + assert_equal("gatcataaca", @loc.subseq.seq) + end + + def test_Locator_with_join_multiple_intervals_return_correctly + @loc.locator = "join(5..10,15..20)" + assert_equal("gatcataaca", @loc.subseq.seq) + end + + def test_Locator_with_complement_and_single_interval_return_correctly + @loc.locator = "complement(5..10)" + assert_equal("tgatc", @loc.subseq.seq) + end +end + + +__END__