From 383bc2adebafb06fd81d27dd3bb5ffef1887c1b2 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 8 Dec 2010 14:40:29 +0000 Subject: [PATCH] added read_genbank git-svn-id: http://biopieces.googlecode.com/svn/trunk@1170 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/read_genbank | 79 ++++++ bp_test/in/read_genbank.in | 165 ++++++++++++ bp_test/in/read_genbank.in.gz | Bin 0 -> 4379 bytes bp_test/out/read_genbank.out.1 | 117 ++++++++ bp_test/out/read_genbank.out.2 | 52 ++++ bp_test/out/read_genbank.out.3 | 31 +++ bp_test/out/read_genbank.out.4 | 18 ++ bp_test/test/test_read_genbank | 23 ++ code_ruby/Maasha/lib/genbank.rb | 372 ++++++++++++++++++++++++++ code_ruby/Maasha/test/test_genbank.rb | 41 +++ 10 files changed, 898 insertions(+) create mode 100755 bp_bin/read_genbank create mode 100644 bp_test/in/read_genbank.in create mode 100644 bp_test/in/read_genbank.in.gz create mode 100644 bp_test/out/read_genbank.out.1 create mode 100644 bp_test/out/read_genbank.out.2 create mode 100644 bp_test/out/read_genbank.out.3 create mode 100644 bp_test/out/read_genbank.out.4 create mode 100755 bp_test/test/test_read_genbank create mode 100644 code_ruby/Maasha/lib/genbank.rb create mode 100755 code_ruby/Maasha/test/test_genbank.rb diff --git a/bp_bin/read_genbank b/bp_bin/read_genbank new file mode 100755 index 0000000..6e471bb --- /dev/null +++ b/bp_bin/read_genbank @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read Genbank entries from one or more files. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'biopieces' +require 'genbank' + +casts = [] +casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'keys', :short=>'k', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'features', :short=>'f', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'qualifiers', :short=>'q', :type=>'list', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +hash_keys = options[:keys].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:keys] +hash_feats = options[:features].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:features] +hash_quals = options[:qualifiers].inject(Hash.new) { |h,k| h[k.upcase.to_sym] = true; h } if options[:qualifiers] + +bp.each_record do |record| + bp.puts record +end + +num = 0 +last = false + +if options.has_key? :data_in + options[:data_in].each do |file| + Genbank.open(file, mode='r') do |gb| + gb.each(hash_keys, hash_feats, hash_quals) do |entry| + bp.puts entry + + num += 1 + + if options.has_key? :num and options[:num] == num + last = true + break + end + end + end + + break if last + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_test/in/read_genbank.in b/bp_test/in/read_genbank.in new file mode 100644 index 0000000..87f75d4 --- /dev/null +++ b/bp_test/in/read_genbank.in @@ -0,0 +1,165 @@ +LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p + (AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION U49845 +VERSION U49845.1 GI:1293613 +KEYWORDS . +SOURCE Saccharomyces cerevisiae (baker's yeast) + ORGANISM Saccharomyces cerevisiae + Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes; + Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE 1 (bases 1 to 5028) + AUTHORS Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W. + TITLE Cloning and sequence of REV7, a gene whose function is required for + DNA damage-induced mutagenesis in Saccharomyces cerevisiae + JOURNAL Yeast 10 (11), 1503-1509 (1994) + PUBMED 7871890 +REFERENCE 2 (bases 1 to 5028) + AUTHORS Roemer,T., Madden,K., Chang,J. and Snyder,M. + TITLE Selection of axial growth sites in yeast requires Axl2p, a novel + plasma membrane glycoprotein + JOURNAL Genes Dev. 10 (7), 777-793 (1996) + PUBMED 8846915 +REFERENCE 3 (bases 1 to 5028) + AUTHORS Roemer,T. + TITLE Direct Submission + JOURNAL Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New + Haven, CT, USA +FEATURES Location/Qualifiers + source 1..5028 + /organism="Saccharomyces cerevisiae" + /db_xref="taxon:4932" + /chromosome="IX" + /map="9" + CDS <1..206 + /codon_start=3 + /product="TCP1-beta" + /protein_id="AAA98665.1" + /db_xref="GI:1293614" + /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA + AEVLLRVDNIIRARPRTANRQHM" + gene 687..3158 + /gene="AXL2" + CDS 687..3158 + /gene="AXL2" + /note="plasma membrane glycoprotein" + /codon_start=1 + /function="required for axial budding pattern of S. + cerevisiae" + /product="Axl2p" + /protein_id="AAA98666.1" + /db_xref="GI:1293615" + /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF + TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN + VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE + VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE + TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV + YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG + DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ + DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA + NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA + CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN + NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ + SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS + YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK + HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL + VDFSNKSNVNVGQVKDIHGRIPEML" + gene complement(3300..4037) + /gene="REV7" + CDS complement(3300..4037) + /gene="REV7" + /codon_start=1 + /product="Rev7p" + /protein_id="AAA98667.1" + /db_xref="GI:1293616" + /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ + FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD + KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR + RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK + LISGDDKILNGVYSQYEEGESIFGSLF" +ORIGIN + 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg + 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct + 121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa + 181 gaaccgccaa tagacaacat atgtaacata tttaggatat acctcgaaaa taataaaccg + 241 ccacactgtc attattataa ttagaaacag aacgcaaaaa ttatccacta tataattcaa + 301 agacgcgaaa aaaaaagaac aacgcgtcat agaacttttg gcaattcgcg tcacaaataa + 361 attttggcaa cttatgtttc ctcttcgagc agtactcgag ccctgtctca agaatgtaat + 421 aatacccatc gtaggtatgg ttaaagatag catctccaca acctcaaagc tccttgccga + 481 gagtcgccct cctttgtcga gtaattttca cttttcatat gagaacttat tttcttattc + 541 tttactctca catcctgtag tgattgacac tgcaacagcc accatcacta gaagaacaga + 601 acaattactt aatagaaaaa ttatatcttc ctcgaaacga tttcctgctt ccaacatcta + 661 cgtatatcaa gaagcattca cttaccatga cacagcttca gatttcatta ttgctgacag + 721 ctactatatc actactccat ctagtagtgg ccacgcccta tgaggcatat cctatcggaa + 781 aacaataccc cccagtggca agagtcaatg aatcgtttac atttcaaatt tccaatgata + 841 cctataaatc gtctgtagac aagacagctc aaataacata caattgcttc gacttaccga + 901 gctggctttc gtttgactct agttctagaa cgttctcagg tgaaccttct tctgacttac + 961 tatctgatgc gaacaccacg ttgtatttca atgtaatact cgagggtacg gactctgccg + 1021 acagcacgtc tttgaacaat acataccaat ttgttgttac aaaccgtcca tccatctcgc + 1081 tatcgtcaga tttcaatcta ttggcgttgt taaaaaacta tggttatact aacggcaaaa + 1141 acgctctgaa actagatcct aatgaagtct tcaacgtgac ttttgaccgt tcaatgttca + 1201 ctaacgaaga atccattgtg tcgtattacg gacgttctca gttgtataat gcgccgttac + 1261 ccaattggct gttcttcgat tctggcgagt tgaagtttac tgggacggca ccggtgataa + 1321 actcggcgat tgctccagaa acaagctaca gttttgtcat catcgctaca gacattgaag + 1381 gattttctgc cgttgaggta gaattcgaat tagtcatcgg ggctcaccag ttaactacct + 1441 ctattcaaaa tagtttgata atcaacgtta ctgacacagg taacgtttca tatgacttac + 1501 ctctaaacta tgtttatctc gatgacgatc ctatttcttc tgataaattg ggttctataa + 1561 acttattgga tgctccagac tgggtggcat tagataatgc taccatttcc gggtctgtcc + 1621 cagatgaatt actcggtaag aactccaatc ctgccaattt ttctgtgtcc atttatgata + 1681 cttatggtga tgtgatttat ttcaacttcg aagttgtctc cacaacggat ttgtttgcca + 1741 ttagttctct tcccaatatt aacgctacaa ggggtgaatg gttctcctac tattttttgc + 1801 cttctcagtt tacagactac gtgaatacaa acgtttcatt agagtttact aattcaagcc + 1861 aagaccatga ctgggtgaaa ttccaatcat ctaatttaac attagctgga gaagtgccca + 1921 agaatttcga caagctttca ttaggtttga aagcgaacca aggttcacaa tctcaagagc + 1981 tatattttaa catcattggc atggattcaa agataactca ctcaaaccac agtgcgaatg + 2041 caacgtccac aagaagttct caccactcca cctcaacaag ttcttacaca tcttctactt + 2101 acactgcaaa aatttcttct acctccgctg ctgctacttc ttctgctcca gcagcgctgc + 2161 cagcagccaa taaaacttca tctcacaata aaaaagcagt agcaattgcg tgcggtgttg + 2221 ctatcccatt aggcgttatc ctagtagctc tcatttgctt cctaatattc tggagacgca + 2281 gaagggaaaa tccagacgat gaaaacttac cgcatgctat tagtggacct gatttgaata + 2341 atcctgcaaa taaaccaaat caagaaaacg ctacaccttt gaacaacccc tttgatgatg + 2401 atgcttcctc gtacgatgat acttcaatag caagaagatt ggctgctttg aacactttga + 2461 aattggataa ccactctgcc actgaatctg atatttccag cgtggatgaa aagagagatt + 2521 ctctatcagg tatgaataca tacaatgatc agttccaatc ccaaagtaaa gaagaattat + 2581 tagcaaaacc cccagtacag cctccagaga gcccgttctt tgacccacag aataggtctt + 2641 cttctgtgta tatggatagt gaaccagcag taaataaatc ctggcgatat actggcaacc + 2701 tgtcaccagt ctctgatatt gtcagagaca gttacggatc acaaaaaact gttgatacag + 2761 aaaaactttt cgatttagaa gcaccagaga aggaaaaacg tacgtcaagg gatgtcacta + 2821 tgtcttcact ggacccttgg aacagcaata ttagcccttc tcccgtaaga aaatcagtaa + 2881 caccatcacc atataacgta acgaagcatc gtaaccgcca cttacaaaat attcaagact + 2941 ctcaaagcgg taaaaacgga atcactccca caacaatgtc aacttcatct tctgacgatt + 3001 ttgttccggt taaagatggt gaaaattttt gctgggtcca tagcatggaa ccagacagaa + 3061 gaccaagtaa gaaaaggtta gtagattttt caaataagag taatgtcaat gttggtcaag + 3121 ttaaggacat tcacggacgc atcccagaaa tgctgtgatt atacgcaacg atattttgct + 3181 taattttatt ttcctgtttt attttttatt agtggtttac agatacccta tattttattt + 3241 agtttttata cttagagaca tttaatttta attccattct tcaaatttca tttttgcact + 3301 taaaacaaag atccaaaaat gctctcgccc tcttcatatt gagaatacac tccattcaaa + 3361 attttgtcgt caccgctgat taatttttca ctaaactgat gaataatcaa aggccccacg + 3421 tcagaaccga ctaaagaagt gagttttatt ttaggaggtt gaaaaccatt attgtctggt + 3481 aaattttcat cttcttgaca tttaacccag tttgaatccc tttcaatttc tgctttttcc + 3541 tccaaactat cgaccctcct gtttctgtcc aacttatgtc ctagttccaa ttcgatcgca + 3601 ttaataactg cttcaaatgt tattgtgtca tcgttgactt taggtaattt ctccaaatgc + 3661 ataatcaaac tatttaagga agatcggaat tcgtcgaaca cttcagtttc cgtaatgatc + 3721 tgatcgtctt tatccacatg ttgtaattca ctaaaatcta aaacgtattt ttcaatgcat + 3781 aaatcgttct ttttattaat aatgcagatg gaaaatctgt aaacgtgcgt taatttagaa + 3841 agaacatcca gtataagttc ttctatatag tcaattaaag caggatgcct attaatggga + 3901 acgaactgcg gcaagttgaa tgactggtaa gtagtgtagt cgaatgactg aggtgggtat + 3961 acatttctat aaaataaaat caaattaatg tagcatttta agtataccct cagccacttc + 4021 tctacccatc tattcataaa gctgacgcaa cgattactat tttttttttc ttcttggatc + 4081 tcagtcgtcg caaaaacgta taccttcttt ttccgacctt ttttttagct ttctggaaaa + 4141 gtttatatta gttaaacagg gtctagtctt agtgtgaaag ctagtggttt cgattgactg + 4201 atattaagaa agtggaaatt aaattagtag tgtagacgta tatgcatatg tatttctcgc + 4261 ctgtttatgt ttctacgtac ttttgattta tagcaagggg aaaagaaata catactattt + 4321 tttggtaaag gtgaaagcat aatgtaaaag ctagaataaa atggacgaaa taaagagagg + 4381 cttagttcat cttttttcca aaaagcaccc aatgataata actaaaatga aaaggatttg + 4441 ccatctgtca gcaacatcag ttgtgtgagc aataataaaa tcatcacctc cgttgccttt + 4501 agcgcgtttg tcgtttgtat cttccgtaat tttagtctta tcaatgggaa tcataaattt + 4561 tccaatgaat tagcaatttc gtccaattct ttttgagctt cttcatattt gctttggaat + 4621 tcttcgcact tcttttccca ttcatctctt tcttcttcca aagcaacgat ccttctaccc + 4681 atttgctcag agttcaaatc ggcctctttc agtttatcca ttgcttcctt cagtttggct + 4741 tcactgtctt ctagctgttg ttctagatcc tggtttttct tggtgtagtt ctcattatta + 4801 gatctcaagt tattggagtc ttcagccaat tgctttgtat cagacaattg actctctaac + 4861 ttctccactt cactgtcgag ttgctcgttt ttagcggaca aagatttaat ctcgttttct + 4921 ttttcagtgt tagattgctc taattctttg agctgttctc tcagctcctc atatttttct + 4981 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc +// diff --git a/bp_test/in/read_genbank.in.gz b/bp_test/in/read_genbank.in.gz new file mode 100644 index 0000000000000000000000000000000000000000..b09274e4e678d7c30fad84a33203756cd6f4ac5f GIT binary patch literal 4379 zcmV+$5#;V4iwFoChW|_e19D|yWM5}xZen3>Yc6SS0Hs)6bK6L+eb2Ax^2<%C9FO&3 zS=!k=aLf;JNHY|hp;u2+owm|Ztu6UV?abugU*MoA+jp&*Nv$g%GDU*G!NCF0<;SZ} zTFH;T`ZS(R#~1kJ;(Rz&pC9;RTt({nPhD2w({S+q*H6{^!E82r6DMh2GU zwztv0%HcbF*{r@@KB%`%(p|oT0eZ-8?;q|~e#Iuv-b7bdiPrEzNic7^qzM=LOJ@V6 zviu)|;cPS+jNTN<^5$a`1BJ6U`s1hODhYrb3*+r)^Y7K}fA;G8%Iy6+^7mttMOCh^ zk3aMxsO0hAX7_#T&Ce=*+_?46D%x9q1eczf*!uP6%O0=B^81Cv{`>!?Fg*z%+lXq;4wGfC==?cJyoA{^d?QB z_EVGS=eMYGYYpT7)8f(Gt-q|vme|;DA9qCF!_VOCjH7(o}?QT|ID4w^ka zcB?0L@9&T{!dvIdD$8}NvobELDr@s7zHSy}mUoF>7G=>yU8i-VljwDOQPP!V)5TSu zH&IhJZB#Xj`E__ZB<<2-goMe^%{B8T2f{ zgCqC!!u;Xnxhw~9`uuo%%L4yks7`lKEq(U7p}(g7flAP{AE43XM`(2Mw`g?TF3Lr& z%d%{vmj9oZT^F_WGKrRTo)wE_U3XE_Rf$etx7Vihd=m-vQ$T1(ou&fyF82Li?VHXzNmDW=k(gR%_@2wK~YxA?sa?1 zGLGXqr@)Fb(>VnfM|FJDMI}uEl#)z$b)1wX1*)q$s#4ui=@Z`B>-OR(EYdb!c2!IMl?frE_2P)VIXP3L3$3fNEu$>y>Y_@?SzTsj z5mk#!FZ3eegywm69qS@*=enBfBeFw&=(f>%KBxS)S}*BuyKJL^{A@Kboqmt%C@Sf1 zRnRZ)0E^))iTLAsy5Ma&Qwnd8mmj4T@yvCc#$5#kI*R+%3oAaB`xYY z(KYd{UQ~pE?v?~tQb%1?=$mFqbSZV+(F;1VeS|NBchPonn-nD#SwciD$dy(%ao1i` zuH*WKn3C(7EH#DhTKcdgG8V7fn>ST6FBet5h{;uAS(dkTOO^8C)eeX#v&`u{c7Xijz;I_XJ_N{(dD~8 zZ4x|IAL%mxowfhC{1G{y{5?6Znwu^uZm6#>%i?NDa*~(nvZ*@K z@db(TvL!v$B%$>Yx>CM#B1BE46fbikZeGS+sf)6mcgrT#`4uT!QB-kxl_$lrD=FPc zxtMqH>-LH`UgUY3v`LrJ5>2EpSskzECGoc|x{9hSZ)rv)*^Tm~Ov-Fdl}4m|-5w1r zBv?riC3!+MQjs=YEfUmuOjA%w6;|ZrtuDJv$6Z#F63-Xp;}QCoqz!RggJ?_Y?G5jj}$XDrJ%U6S~!H{fu`ifpiPtiBl z@h2FtWWw`DvQ1v_N4}B6pqDu|&z_my)|6v3Y=JH+jsr2UU-V-BL|5S|eWI(3hGXyO zUbfDU23x~1$4HNmGv1N`k3W1X+ht=EIfZ4781M!t@*DUz@1Oh{oevcI;_x!S!JZSe zFu;I#g$v}IcL8*E(=_M9dwek0%PIQiw6hMX0}oZ%!`tH4gd-U+bg;Mat)2cG;kgzKI% zW`LOrG+=Mp8Rnzs7(f{^+uLH^J9*5=yR7L0gAMFC_v|xU zQwE<=nwCq%FyTQ!;AuE=&N$=6zmvo6z=_K#=h22#a?0_zq@3ABj&bA`#SJnej}#-n zlHESUI0f%PVc~=z53V{1jX#>coB%E2rEmp?1NJk*p2Oy^VGFtQtO!!vT;>)ZS$Q}{ zrWoGTeU9pwd3)}xiB*j~fP=t`t~rT~+k^wXdcQ`%p6M$F99S>mqFWeo2I|3NXKce& zgAhz#LxT}C2b#j10N9W)rUyLaH=qLeg^IlgYeeAKYTt(j<1z4(!a(l;xE{~}aVEQH z54bAV7&?~B=Ug3J0DJ2XDfj3OC=tGulPWJ0`4NB&q+bf-39pL@@bUoPJmDoMqskE@ zQpdt9t(tB^*M#`@eoa_&41D0)mo(6GXk?-U7R?yB<`Uq?*#IQtxlRlw(?MuHoL`R7 z$vp}Xf*=ZvK=*+xGAH`V!drr^OX$650vHYl;Gzl1#|+@$xDx`hNO8CxKZkmp0((&u z4gtLYx60G7)u+yTQAi)^5w5!fUQ@X~g4C>*3SI$4 zu{Atr-h<2Xy^%7;Fo=1^qKd;X=#=zoi~}Gq+G?;hkY0;%4s%W*c$l;>*nwYo$O^yY z4UXlQzqh@u3Al;|FY}p@D!>7P$Rx%0a2`V^;x+=&In1lWq4Zkt3Mq#!haz-ddKxh7 z8^0I+p~z%%GrcXDFx2BT8nSL0=`?)}4M(i^GH!$!6uk$k zCsT@b7!uGGy)->;@u6qoPp~y+?4=P~ndPk9paixs%OSlCf^#zEU^1|nJo~;q9E11% zFml_zfwC=!_U4rE4`OO%I){iePw|Fe>tYzRFtm8nS0PFh9$Fa(g8X2>WGKQc$BF7~ zf%nof4|5I%aT`>TE6-YKYG_FoldF@tkCmvmH5mt;V5W$MW8M{%g?Y$G7=%KjleIyH z?_MW{mxMjq6&zC1zoB0;6}ZsTME@*0osb1>7jX1>yae_p>;gQ-$U=q^>0>m)K1@F- zT_z-&i;Tg+)^sR-K}a~|3eNgA2Jv*Q34E|%nMLAcj!aIDeHYdXj^0-ssS;T zK^UwH+m}ql24oJhYWiL~oB?~;B&3xZ=?zN8cnbqEl41E03O~(-Py8C4GxnH#u*`~R>1tP&lPHsWk(oLI;~s=l)PVMiVfziQ844q)tv4ogI(`uY9BlDki^9V)B2S^MDQWfrMa~UF z7L;3hgUl;*xwnPgDlD*GB*Q=9Wy3TnvM9nV(-SsSaXje62zxCsLTj*6k5*(LjA|-u z#jfv8Uiu&A?WaC8nw$&2(Bqx5VWUF{c$wM^Mtzx0mF)Hy>txh3hh96H0DD2RpQ*rH zFbSKcSK(yGNNr=f@|d}UhzDDj&>UHa@LD-L@L^9gPkEFr$%u-CaT8wkdOx}h*o&U4 zkU2;S?sFIhgDK~OEoWU%nrF2=9n1WM!vqwxmqAB`fg8sQa!_E6Q0{} zT8Gx$V?RCz?+@oWvdu^<71_jzk(09!Ie3vD0RbKGU~4KhBxzSRXfz~6cJ!b`m~DDn zIPL?L-P2w>=Dk*SAR}Vx<^0Uyc)NeJ^I>EuagwhM02z4CW0dufnc?`CE>O zaAeUpcF2!YVc|OL?qlq=VLNl;Hy-LYYXB|-2X7xL9NWpUhhW(2>Uadbm)sktLMS$H z6)Ba#B)>AL%Xy&46Khd#E9_S8pgAHVGVo#!uqqteUz2qFb?zKSIw97>?q1|mMQH5nWZ zpJYt#Wz)&!zT%ycnY`~JXfEJtUqj=G@E-CgGXmg(M$2)H_hDNJC`zZ5v$Rk#y{)PA zAsiJrnM}h`7-JzPSte}sHXQ8XG*pQuxNrVRycUZHq@_F0OInQjd<~Z55Mc-h-$mk)bL+a39 Vr7$2_KmGLP{{YeZet0b@003w5kFNj# literal 0 HcmV?d00001 diff --git a/bp_test/out/read_genbank.out.1 b/bp_test/out/read_genbank.out.1 new file mode 100644 index 0000000..bc821ee --- /dev/null +++ b/bp_test/out/read_genbank.out.1 @@ -0,0 +1,117 @@ +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +DB_XREF: taxon:4932 +CHROMOSOME: IX +MAP: 9 +FEATURE: source +LOCATOR: 1..5028 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgat +--- +PRODUCT: TCP1-beta +PROTEIN_ID: AAA98665.1 +DB_XREF: GI:1293614 +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +GENE: AXL2 +FEATURE: gene +LOCATOR: 687..3158 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: AXL2 +NOTE: plasma membrane glycoprotein +FUNCTION: required for axial budding pattern of S.cerevisiae +PRODUCT: Axl2p +PROTEIN_ID: AAA98666.1 +DB_XREF: GI:1293615 +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: REV7 +FEATURE: gene +LOCATOR: complement(3300..4037) +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- +GENE: REV7 +PRODUCT: Rev7p +PROTEIN_ID: AAA98667.1 +DB_XREF: GI:1293616 +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +LOCUS: SCU49845 5028 bp DNA PLN 21-JUN-1999 +DEFINITION: Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p(AXL2) and Rev7p (REV7) genes, complete cds. +ACCESSION: U49845 +VERSION: U49845.1 GI:1293613 +KEYWORDS: . +SOURCE: Saccharomyces cerevisiae (baker's yeast) +ORGANISM: Saccharomyces cerevisiaeEukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;Saccharomycetales; Saccharomycetaceae; Saccharomyces. +REFERENCE: 1 (bases 1 to 5028);2 (bases 1 to 5028);3 (bases 1 to 5028) +AUTHORS: Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.;Roemer,T., Madden,K., Chang,J. and Snyder,M.;Roemer,T. +TITLE: Cloning and sequence of REV7, a gene whose function is required forDNA damage-induced mutagenesis in Saccharomyces cerevisiae;Selection of axial growth sites in yeast requires Axl2p, a novelplasma membrane glycoprotein;Direct Submission +JOURNAL: Yeast 10 (11), 1503-1509 (1994);Genes Dev. 10 (7), 777-793 (1996);Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, NewHaven, CT, USA +PUBMED: 7871890;8846915 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/out/read_genbank.out.2 b/bp_test/out/read_genbank.out.2 new file mode 100644 index 0000000..362f142 --- /dev/null +++ b/bp_test/out/read_genbank.out.2 @@ -0,0 +1,52 @@ +ORGANISM: Saccharomyces cerevisiae +DB_XREF: taxon:4932 +CHROMOSOME: IX +MAP: 9 +FEATURE: source +LOCATOR: 1..5028 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgat +--- +PRODUCT: TCP1-beta +PROTEIN_ID: AAA98665.1 +DB_XREF: GI:1293614 +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +GENE: AXL2 +FEATURE: gene +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: AXL2 +NOTE: plasma membrane glycoprotein +FUNCTION: required for axial budding pattern of S.cerevisiae +PRODUCT: Axl2p +PROTEIN_ID: AAA98666.1 +DB_XREF: GI:1293615 +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: REV7 +FEATURE: gene +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- +GENE: REV7 +PRODUCT: Rev7p +PROTEIN_ID: AAA98667.1 +DB_XREF: GI:1293616 +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/out/read_genbank.out.3 b/bp_test/out/read_genbank.out.3 new file mode 100644 index 0000000..42b371b --- /dev/null +++ b/bp_test/out/read_genbank.out.3 @@ -0,0 +1,31 @@ +PRODUCT: TCP1-beta +PROTEIN_ID: AAA98665.1 +DB_XREF: GI:1293614 +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +GENE: AXL2 +NOTE: plasma membrane glycoprotein +FUNCTION: required for axial budding pattern of S.cerevisiae +PRODUCT: Axl2p +PROTEIN_ID: AAA98666.1 +DB_XREF: GI:1293615 +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +GENE: REV7 +PRODUCT: Rev7p +PROTEIN_ID: AAA98667.1 +DB_XREF: GI:1293616 +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/out/read_genbank.out.4 b/bp_test/out/read_genbank.out.4 new file mode 100644 index 0000000..c2341e6 --- /dev/null +++ b/bp_test/out/read_genbank.out.4 @@ -0,0 +1,18 @@ +TRANSLATION: SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM +FEATURE: CDS +LOCATOR: <1..206 +ACCESSION: U49845 +SEQ: gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgta +--- +TRANSLATION: MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML +FEATURE: CDS +LOCATOR: 687..3158 +ACCESSION: U49845 +SEQ: atgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtg +--- +TRANSLATION: MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF +FEATURE: CDS +LOCATOR: complement(3300..4037) +ACCESSION: U49845 +SEQ: tgaatagatgggtagagaagtggctgagggtatacttaaaatgctacattaatttgattttattttatagaaatgtatacccacctcagtcattcgactacactacttaccagtcattcaacttgccgcagttcgttcccattaataggcatcctgctttaattgactatatagaagaacttatactggatgttctttctaaattaacgcacgtttacagattttccatctgcattattaataaaaagaacgatttatgcattgaaaaatacgttttagattttagtgaattacaacatgtggataaagacgatcagatcattacggaaactgaagtgttcgacgaattccgatcttccttaaatagtttgattatgcatttggagaaattacctaaagtcaacgatgacacaataacatttgaagcagttattaatgcgatcgaattggaactaggacataagttggacagaaacaggagggtcgatagtttggaggaaaaagcagaaattgaaagggattcaaactgggttaaatgtcaagaagatgaaaatttaccagacaataatggttttcaacctcctaaaataaaactcacttctttagtcggttctgacgtggggcctttgattattcatcagtttagtgaaaaattaatcagcggtgacgacaaaattttgaatggagtgtattctcaatatgaagagggcgagagcatttttggatctttgttttaa +--- diff --git a/bp_test/test/test_read_genbank b/bp_test/test/test_read_genbank new file mode 100755 index 0000000..1ad3909 --- /dev/null +++ b/bp_test/test/test_read_genbank @@ -0,0 +1,23 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -i $in -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -i $in.gz -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -i $in -k AC -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -i $in -k AC -f CDS -O $tmp" +assert_no_diff $tmp $out.3 +clean + +run "$bp -i $in -k AC -f CDS -q translation -O $tmp" +assert_no_diff $tmp $out.4 +clean diff --git a/code_ruby/Maasha/lib/genbank.rb b/code_ruby/Maasha/lib/genbank.rb new file mode 100644 index 0000000..1bff05d --- /dev/null +++ b/code_ruby/Maasha/lib/genbank.rb @@ -0,0 +1,372 @@ +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'seq' +require 'zlib' + +# Error class for all exceptions to do with Genbank. +class GenbankError < StandardError; end + +class Genbank + include Enumerable + + # Class method allowing open to be used on (zipped) files. + # See File.open. + def self.open(*args) + ios = self.zopen(*args) + + if block_given? + begin + yield ios + ensure + ios.close + end + else + return ios + end + end + + def initialize(io) + @io = io + @entry = [] + end + + # Method to close ios. + def close + @io.close + end + + # Iterator method for parsing Genbank entries. + def each(hash_keys, hash_feats, hash_quals) + while @entry = get_entry do + keys = get_keys(hash_keys) + seq = get_seq + + features = GenbankFeatures.new(@entry, hash_feats, hash_quals) + + features.each do |record| + keys.each_pair { |key,val| record[key] = val } + loc = Locator.new(record[:LOCATOR], seq) + record[:SEQ] = loc.subseq.seq + + yield record + end + end + end + + private + + # Helper method to return an ios to a file that may be zipped in which case + # the ios is unzipped on the fly. See File.open. + def self.zopen(*args) + ios = File.open(*args) + + begin + ios = Zlib::GzipReader.new(ios) + rescue + ios.rewind + end + + self.new(ios) + end + + # Method to get the next Genbank entry form an ios and return this. + def get_entry + block = @io.gets("//" + $/) + return nil if block.nil? + + block.chomp!("//" + $/ ) + + entry = block.split $/ + + return nil if entry.empty? + + entry + end + + # Method to get the DNA sequence from a Genbank entry and return + # this as a Seq object. + def get_seq + seq = Seq.new + i = @entry.size + + while @entry[i] !~ /^ORIGIN/ + i -= 1 + end + + seq.seq = @entry[i + 1 .. @entry.size].join.delete( " 0123456789") + seq.type = "dna" + seq + end + + # Method to get the base keys from Genbank entry and return these + # in a hash. + def get_keys(hash_keys) + keys = {} + i = 0 + j = 0 + + while @entry[i] !~ /^FEATURES/ + if @entry[i] =~ /^\s{0,3}([A-Z]{2})/ + if want_key?(hash_keys, $1) + j = i + 1 + + key, val = @entry[i].lstrip.split(/\s+/, 2) + + while @entry[j] !~ /^\s{0,3}[A-Z]/ + val << @entry[j].lstrip + j += 1 + end + + if keys[key.to_sym] + keys[key.to_sym] << ";" + val + else + keys[key.to_sym] = val + end + end + end + + j > i ? i = j : i += 1 + end + + keys + end + + def want_key?(hash_keys, key) + if hash_keys + if hash_keys[key.to_sym] + return true + else + return false + end + else + return true + end + end +end + +class GenbankFeatures + @@i = 0 + @@j = 0 + + def initialize(entry, hash_feats, hash_quals) + @entry = entry + @hash_feats = hash_feats + @hash_quals = hash_quals + end + + def each + while @entry[@@i] and @entry[@@i] !~ /^ORIGIN/ + if @entry[@@i] =~ /^\s{5}([A-Za-z_-]+)/ + if want_feat? $1 + record = {} + + feat, loc = @entry[@@i].lstrip.split(/\s+/, 2) + + @@j = @@i + 1 + + while @entry[@@j] and @entry[@@j] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/ + loc << @entry[@@j].lstrip + @@j += 1 + end + + get_quals.each_pair { |k,v| + record[k.upcase.to_sym] = v + } + + record[:FEATURE] = feat + record[:LOCATOR] = loc + + yield record + end + end + + @@j > @@i ? @@i = @@j : @@i += 1 + end + end + + private + + def get_quals + quals = {} + k = 0 + + while @entry[@@j] and @entry[@@j] !~ /^\s{5}[A-Za-z_-]|^[A-Z]/ + if @entry[@@j] =~ /^\s{21}\/([^=]+)="([^"]+)/ + qual = $1 + val = $2 + + if want_qual? qual + k = @@j + 1 + + while @entry[k] and @entry[k] !~ /^(\s{21}\/|\s{5}[A-Za-z_-]|[A-Z])/ + val << @entry[k].lstrip.chomp('"') + k += 1 + end + + if quals[qual] + quals[qual] << ";" + val + else + quals[qual] = val + end + end + end + + k > @@j ? @@j = k : @@j += 1 + end + + quals + end + + def want_feat?(feat) + if @hash_feats + if @hash_feats[feat.upcase.to_sym] + return true + else + return false + end + else + return true + end + end + + def want_qual?(qual) + if @hash_quals + if @hash_quals[qual.upcase.to_sym] + return true + else + return false + end + else + return true + end + end +end + + +# Error class for all exceptions to do with Genbank/EMBL/DDBJ feature table locators. +class LocatorError < StandardError; end + +class Locator + attr_accessor :locator, :seq, :subseq + + def initialize(locator, seq) + @locator = locator + @seq = seq + @subseq = Seq.new(nil, "", "dna") + end + + def subseq + parse_locator + end + + def to_s + @locator + end + + private + + # Method that uses recursion to parse a locator string from a feature + # table and fetches the appropriate subsequence. the operators + # join(), complement(), and order() are handled. + # the locator string is broken into a comma separated lists, and + # modified if the params donnot balance. otherwise the comma separated + # list of ranges are stripped from operators, and the subsequence are + # fetched and handled according to the operators. + # SNP locators are also dealt with (single positions). + def parse_locator(join = nil, comp = nil, order = nil) + intervals = @locator.split(",") + + unless balance_params?(intervals.first) # locator includes a join/comp/order of several ranges + case @locator + when /^join\((.*)\)$/ + @locator = $1 + join = true + when /^complement\((.*)\)$/ + @locator = $1 + comp = true + when /^order\((.*)\)$/ + @locator = $1 + order = true + end + + parse_locator(join, comp, order) + else + intervals.each do |interval| + case interval + when /^join\((.*)\)$/ + @locator = $1 + join = true + parse_locator(join, comp, order) + when /^complement\((.*)\)$/ + @locator = $1 + comp = true + parse_locator(join, comp, order) + when /^order\((.*)\)$/ + @locator = $1 + order = true + parse_locator(join, comp, order) + when /^[<>]?(\d+)[^\d]+(\d+)$/ + int_beg = $1.to_i - 1 + int_end = $2.to_i - 1 + + newseq = Seq.new(nil, @seq.seq[int_beg...int_end], "dna") + newseq.revcomp if comp + + @subseq.seq << (order ? " " + newseq.seq : newseq.seq) + when /^(\d+)$/ + pos = $1.to_i - 1 + + newseq = Seq.new(nil, @seq.seq[pos], "dna") + newseq.revcomp if comp + + @subseq.seq << (order ? " " + newseq.seq : newseq.seq) + else + $stderr.puts "WARNING: Could not match locator -> #{locator}"; + @subseq.seq << "" + end + end + end + + return @subseq + end + + def balance_params?(locator) + parens = 0 + + locator.each_char do |char| + case char + when '(' then parens += 1 + when ')' then parens -= 1 + end + end + + if parens == 0 + return true + else + return false + end + end +end + + +__END__ diff --git a/code_ruby/Maasha/test/test_genbank.rb b/code_ruby/Maasha/test/test_genbank.rb new file mode 100755 index 0000000..973ee89 --- /dev/null +++ b/code_ruby/Maasha/test/test_genbank.rb @@ -0,0 +1,41 @@ +#!/usr/bin/env ruby + +require 'genbank' +require 'seq' +require 'test/unit' +require 'pp' + +class TestGenbank < Test::Unit::TestCase + def setup + seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", "dna") + @loc = Locator.new("", seq) + end + + def test_Locator_with_single_position_returns_correctly + @loc.locator = "10" + assert_equal("a", @loc.subseq.seq) + end + + def test_Locator_with_single_interval_returns_correctly + @loc.locator = "5..10" + assert_equal("gatca", @loc.subseq.seq) + end + + def test_Locator_with_multiple_intervals_return_correctly + @loc.locator = "5..10,15..20" + assert_equal("gatcataaca", @loc.subseq.seq) + end + + def test_Locator_with_join_multiple_intervals_return_correctly + @loc.locator = "join(5..10,15..20)" + assert_equal("gatcataaca", @loc.subseq.seq) + end + + def test_Locator_with_complement_and_single_interval_return_correctly + @loc.locator = "complement(5..10)" + assert_equal("tgatc", @loc.subseq.seq) + end +end + + +__END__ -- 2.39.5