From 45cc6eab58cfc1d5af693a10f70de2ce18985286 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 8 Feb 2011 20:38:46 +0000 Subject: [PATCH] added read_sff biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1262 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/read_sff | 75 ++++++++++++ bp_test/in/read_sff.in | Bin 0 -> 17512 bytes bp_test/in/read_sff.in.gz | Bin 0 -> 8641 bytes bp_test/out/read_sff.out.1 | 90 +++++++++++++++ bp_test/out/read_sff.out.2 | 9 ++ bp_test/out/read_sff.out.3 | 9 ++ bp_test/out/read_sff.out.4 | 9 ++ bp_test/test/test_read_sff | 19 +++ code_ruby/Maasha/lib/sff.rb | 225 ++++++++++++++++++++++++++++++++++++ 9 files changed, 436 insertions(+) create mode 100755 bp_bin/read_sff create mode 100644 bp_test/in/read_sff.in create mode 100644 bp_test/in/read_sff.in.gz create mode 100644 bp_test/out/read_sff.out.1 create mode 100644 bp_test/out/read_sff.out.2 create mode 100644 bp_test/out/read_sff.out.3 create mode 100644 bp_test/out/read_sff.out.4 create mode 100755 bp_test/test/test_read_sff create mode 100644 code_ruby/Maasha/lib/sff.rb diff --git a/bp_bin/read_sff b/bp_bin/read_sff new file mode 100755 index 0000000..a14048e --- /dev/null +++ b/bp_bin/read_sff @@ -0,0 +1,75 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read SFF entries from one or more files. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'biopieces' +require 'sff' + +casts = [] +casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'mask', :short=>'m', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'clip', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +bp.each_record do |record| + bp.puts record +end + +num = 0 +last = false + +if options.has_key? :data_in + options[:data_in].each do |file| + SFF.open(file, mode='r') do |sff| + sff.each do |entry| + entry.mask if options[:mask] + entry.clip if options[:clip] + bp.puts entry.to_bp + num += 1 + + if options.has_key? :num and options[:num] == num + last = true + break + end + end + end + + break if last + end +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_test/in/read_sff.in b/bp_test/in/read_sff.in new file mode 100644 index 0000000000000000000000000000000000000000..59f742e9acef99f00bea07eb246e5de888ba639f GIT binary patch literal 17512 zcmeI4S&&`Vb)NU>i|)Q-_Z@mf&yA6hK>#EuiV94p)e=F~HYNTfnhq$HA(KvJSCla`appj2MsxbomiQhrFB?_1|K z08&zwqe`mskkh#5>|yP-);{N+|KDrxeR2EM<>i!8#)o6(4^ry*G>LNdOxlqBVs>a| zcHjU1{6BOLW@q-rlS-4RhsQRUINKbl2|pAADf{AqtMB?sPs?Qa!Dv zT6!$4q@i?`{9yWfly#-GbdvH)$IqpoNlogfvyEwN>3vT52C({eUR zO*LIled$bEqU9=5ivH;i&WlLDkgkxc(5J4j-%S4${S`d>+ejS7>JUAs zrRUNJGQH^ncc_+@X}N+_Z`#k9JCVIBT_#mUrbvIAX#fkAbe!wC^g*mQ(vPK=(6}+Z zn!b_#BsH`Dk^0fJf^=UhWt-92!h_A}Z&6-Or_y$`&C~BDQh%IIQ`du*?~+?eok(2d zo_`a2_tVE(dL_GuYi~M>g%u=g=?&WVV)X^`uXc2yy-uo{UPfaHYu29S<8$mqTPgA_ zY}SxCfLDuH`3uU=Vz7(i1e_u!Yw*YWyA?p&R}1ISm?0(aWyeV*rB$M!{@LB@B=`b*Rl z(i61q$$o|YoTjvz-r}rh75rMo*EQ;k>7Q|T&QLywm)+!_O;3}nrr#lVi2M_@|L1tx znSLsrz}Hs#7wIAN&(oW3dhv(pBjif7@5aB*^eOIGH+?^mCiosLXTOEjpFrZn^rx5S z`Jeg5T}aieqtL;pkQID;Qr6MIsX#uZ_#|qnrb~sywa{Ubx(;g1QJQ76pj>%m_#~QS zl!Y!zI*{p51CrF*UVEWAipk+eqK(?y%G4XO9l0eW(2Tp0#e#org;=M#I`tk2s<4WC z;Sxofpa20AsKg~aP=k6oVGrTxV85Bya|o=v(f5_!-tH;W|4tH8nXhGBPpJ z6GvZvoy^4K)b#YGO`A4zY{?H&n>JIlX>xL!gJ#oH6O$fhr>9I!aEx-0qPl(LHG=~K zqobBgPEtQUF+D_jViFA@raJNy(T>c>NPmAnB^y0*Rh!zSI6D&3Q=7CmRj=!Ky*^g2 z*9QmjD%Yjj^;CWx8XB6I;M6|)hljB-tRWNq!$U(gj%LMk&2MSWLsPtY_nE!HLo<)= zdE_7ki+s+2Gg&&#E3iVY5O3(Wcz-*<0T7(pWzt1zi@a73aozwLRmpG2_JS;B+Q>CW z`Sxzjo&bL~r0--$V69Hj&KTG7Eam;+(>ZK) zQGOmRhZ*PGp8W<0QcV8^v>w31k6^Wgq@h`XsY_B>i1#PQbJOk(#UE z;wQ23@6#AEt7!*qYskF>p3URqUqkk1sHvydXd!d`V|rBra~pxUt@J$7o#@u{YU z(*fmgmu zNg@3_->5QwN;?j^jP5gd&;xI@@bas8ROc>DBhkb6bOYa;3*5^pWi7q~N9o-L?$vO5 zg#KT`tB>%lIG$CxoAqo{`fWUUh;xg(cUB$(!hzxfm@P!79GK~@dCobS7XWbTC=tAY zZ3PhtG?#0n0*XTeZ3O!uBotFF!O2DlQKd>0Nf_B;!;sNu0oM&>d4s-^{ z$p(P8aa|(FOKzbX3SPn?lD&=t9ER}|bM@AZro>pUIWPE&dY;0^~WN4__(^DH7-Z%=0 zkR2QvLc!?Bc)8ph936#{B&Cgle}8{(Z?CvNu(8!@wpwsW9YPpFW{}cWP6`UNXhcQJ z2EC)Bv}}>+?;jlOr(J(bnOd{SslUHg#ld=CvsvZn?QJ!iO~kZSqfM)iqtR&Ck4mM& z)uUF+ubi9u)!(n11O0t{B%958Z^+at#Y(Z*rp!gCgd87tf zI_m#6`8uT)TAv~%wiqY$f^Ck|&mf`QD)Nrd^RfIId83yKwpKym=gBRgtruCNDdVsO zFtRs&3te^El^IuG#FifPMjxu+eNW_$VO3pUMQa5=)pH%&da_1o588Y9J58;;{3Yto zAiIcUH+toea<-ZL7ilvAdsc}KYS}JwJ?V1xe)2uk4j_M?vQy;j;~ZK{Yq3Osm-*{W zjqEODitxaTTua>Fv$Q{ng%NC*vmM!|DX-FMns$qD%mB4#g8AksX=S@$rY7|wvysno z_8#<#{fz#zZJ=!n+e65x+sH_}iT)8s5;a~4{U^xP)1~a=F)}bh^@$n>Uqn(a9ONp; zA49`6Ja`R_6>5BXtFWEd#jMHQdkU%7;er+V{TO~3{T@S>=rJ2dA6m1Yro6&k_%dhj zb&>pKB(EaXgQs;Ey^epshWB}V_zE8Oz?mh=m*O69f9TOk{E=ggA?=gTwTUjH-7$<^poV zLXwue#hFVUQIO&d1JosppoIcwpW6_OM}R8gXmHWyfnRhy_l z={Q+Np)9Fu8n(ICX>Z0tJ<6+;L{8 zyb&pRMOuL%Kxxhhu5(gUA`s{ zw{F`;Vtalzv32YA?eCEMf!V34DPyS3Q*hFaXzCVeL#)l4siby2*t%JrmvR0+bs@ER zGXd8v)gk&0>2?`rZX9~$F)+{^o|qULuGebA4^ZIz#5q32 zFe1TCCeblFeQ%X#mnyW6-sUE=(9{qQAiAT z_UAOE^}w-Gp!F(tda)tuHjr?BY=IG~vq^GPm3O*=MseL4u2ZCXkcZ#0_kyowP+)uZ z2})0cPXl~F?CkH5FCr^`m9kOJU3hVo5x<_ij-Hp*5W4jL5E9R!w+E>@i1~%EYP8Tp zf4+dkv#Bd{K5+qy1KBX;E42PP(q+7C;9V8;J51SmG{1tJeqN#GE94uXx{^kGi+EO! zp06FbfW1h z`J5%!Xesvh;`<9|Y``e6b+$L#fqWJDI^UnLEFaB&Gy$X7k`M#dVOCvO9Ny%;CN!NlM7bx%KIS_55?JRnG(nk$3z;-zU90#gF zT45a^mQkoS>_7nU5`lBNkVC|6BuxPS9JUa-6&KBeZ^Ui0oRp<9G{`f2+F-9mt%pK& zi~81~Tk6s^72uER&`DKJJU1yR_%M7{$oags4u$QNosf5^K!8I1@p==!+o^zBIAg2F zb`oztw&r4+V}9*zjW=;jPEO|FA6iVB@J!9yq;JX4E^j0|+*1DbBvNlLASa)^qdd&D z1#dX9^%go=XA>vn#)pT8`Sx-Ik1%`R$%o@6kL>xtq5COt4Z*p4ImBq=G;dXrx3$V> zR}`;jyLdZ`NL)bHci))85#pDSc69$G-nvfO4N&je!8v5QvcJK*YV_bt(h)w3j1^sf zd7byy^`fW1aPh2;#A)8yKHg{1{RQ505zF{yf!Zeg(aVT&iF}#dav)EYd<)CVlzba0 zN4kA@^X0&?YT$64GRN7D>K&mDl3qfph16N*p~N-$OT4WhVXA_aT05?B1TN+nBb6x? z&GWS)*CouYfMq4{@>TNoRrZq`y1>=DxH>!2Bz|(XXEk)$FUQsTEjP2yk?q0GGJc&! z9uMJ_Yv~le%c(pEB$v>r@2=e#eHpK*-TTtwj(!sjXYsxZk6aC@awpC(^VLG~2t60S zze?QW3RjVNDEX}s_w_t=;`k}7yh&eb+#6ZWNbfc3Dm)KafHM3vlKB$a<;uh1K{Ydu z>F0Uo(L*`g3Fj!c#JJw4uC|lt=||^TTF$q5SiH}_M_1z-v!&EuCRau0WBA{|+bj6# zJvhM~DszurS*v8X5wFc7ZH!3tkUBe-d5VL4hwOv)@>Qz_`eeZ4ls?Y&1fNd)a9;77 zd>pmNa7K=f97dSOOK9fKWk1RJqCB#W-~c#C3+x6U7eHqbnGEzG>PsgPhk(cd`xJz@ z5l1c{)Ki#aHT)wLAYwZ?!84o?XHnfl>k*PzMNV4q4CEU(=!@i}J9-kzC_=!-T0kyR z(O#z|E{uo=I;|kt=v7-)I~oy(fn`1s7l_b8wJ=RGXhB4G><x9dS~xD><88;2DpvL(1b0XU(6s;09m3@5uY_o}plg&jo5X z@Xps9foH$R`zOZqfe3Y?3}*qv$1iX$WIs!(D-;)*CzlZ%opvTiy+z6xO=eTPdqw8J zhrp^<_77?0%-1MLa}_It;L@M)W^U!J-bl^wV99mX+rSZL73T2h9Dm>D-8~B0GO$&L`Vd|@LssX0)bMweobxNsVqF%nhelEE2goC$!)Pv~e+7b_!b*vp zGa=8=<}y9IhP}_jv&V^W7CF1a;Y9W?;nf;mh**yPpX2Q4_i@Tg>C==S4T;}|wK~zk zj3OnKw2A!=50h)M^6+H_pY9VFz*b_VTITo%$i9J(u70>5XIplS9#&Y5-%9C1`bqlJ z%QNgxzndPw(^AK5+DXrku@cb%HyFEolzczlR`CDNXj8|7&qUs*unMz_&KoHya_>Hl z509ix*-`GB^HHq0F~+EX;e$MtH7|%xZ}wG7{0qc^$Ga` zcm!6__Hs^rhP zC(NNES40A%)HyK~)PN#oWCsOpK|PLzT(r|44{1QJ$kIUlVV+aYN=nFPxWvEkBkzC` z`f8h81$tD3nhw@yqw~;!oC4^Iz=HCPYj6tOV99Kb^aum(G=NtOVT5&B8A#Yt!pJ|l zDx&}zBP;7|?uT)b-l#fU598sTim>}3F5x-zPeZIvG{?t5<#Eb1b`zQcL~75DO>+EQ*Y6B%H8r936xG>Qb3| z1=@v}i%kQ_^}Fm+ud%+eai|dsA8^CyP^^OV4{vncq*jX+6c<^_q+_8a-?_lrO1}#& z&3e7*T8V2e)h4Sj{mn*it2NZu=c3D?M_*rWth=nQxwzuux=P(dz1i$FQD?1%cfV4v zSIPPP%;TB=4l4Y)*_-@y_yc$Er(h9Ga^2V&2IGqbkir>j@xKH&^;2&2;`&8C4qrn4 zkI1?6%$1LCfds}EqLi}~i^w~kHNJ3fmTMQzf;7;w48pE*HMh+5B_x|zSwo@CLXg=8~(7usVq-{EM}Sq^u7>4jr@$F^rET_o*Xj-y_q1INa$jx1$+!ZYW4 zWQ96N=lJ*=JzS4wcfc>KQ_$Xic*net&b+BJU-3AGq%%0iYFEPE%k)B~ ze4dfH`j%sr!6<7j+d?mV0!CF0P*ywV$r;Uj1rP0ye&(wc*v};_@J-A96o2jiOL*!Gn`>3hD>?^v86VxdSHj!t z+~)>-w#+xo9{UWgz!YWlVJX{6OLxLurk!z4BUrgg?enD1Bfr44h&Sp(GW$hl7<~pu z@Y#8+Bj_<2e-8b|X9MtDH?r=U>%hL>jF&0(2^_%>uU({E)pLhm7gu95=U?4N=Wf1L z5F09xH@xQj6PW_+;EY*tj8TlUW3q)u(127U!N>%KoI)aOktit1<$|?x>VREPT0mZ! zXp@C}Bu7Xd)KIU%pp(#Hic*xJ6fbgC;v6nm6I>P^L=Z!Xv5S6&J+w!qcBCGw3Naf7 zTm#ZlKpwtCCg2@d7W;u`t0f1}nj_#HVY0%r&<;@m?Sj1o5G)bmbu+hh>%@egx3&7eoyjAdAHvGi)K=q{srDKQ6K!VW@T-`|F-6wnyUgC;iJ3bN z?tc#ju9`Rp;T($V@jcmnj2^qv;f@`=@xAGdj`uPebYIQ{F-DU&+Z9yf0{5tj6@4I_ z*z+o5!UAt)AMHdA_vf4-y+Auh{_Z>0ma{5V-dFKt1EX+PTx;OnFz0zjvCdJsChRK0 z1`rOE&3-jTq{q?}V_$L9xi$9+Y$Vcp00b}sa@2kay=!>i!?@W!!Fv2M<)Y3t>dw)^ zc@oz&E+SV0g`5d{nHF`%z0Q9bw+(P78u)yk+6Jxqv%dg~ne)rsGu1=PH^qJ|=keBP zWn}a>vwIl-8t*uZR$*1a73mY8uBg~UROgJ9D??TK`wV&Kn=16jb%ud#h_;Ta=jc^8 zvTuQ+MvUF;XC9=D`?cL?T#7q-fYEoAJ-Nx|GYP7lRoyk5(sVi`< zSzRX@>f(+RI6JFU<$mP%&nNvIymNCNj;CLE+@;{bYROMhR}kv4j8=T4oh*ivTX*`3bIZo zc^EXrA=nx&$N>>NkFVEFPfiXG zkB<*emOYq;A_5xPI60{}moddz&!A$0w5G-OboS*Vk(uM9-^OCt2Hn-GC(ZETFQ+h<-9TkuMGM= z+~k#q9=Q7h6!?BRqI9m>nXDpjq$9FQdp><3-s+PefMa=A=Vgr&X!8wlqL}RntYhaU zsMMXF2QSu& zyI~-IpK_A(BcQ+YN}r=<5uQ22pE&d)XMS~(-A>+>TJMtOYBR2?(9-DXiy)XxBOkg7 z@0yPBTE3g93tVfZ?#w*^>=(l;trgjC^>uu8bo^zsRf%F6_;U&mzCgV=?^=g@+7__f z3qHCw@d_ntTwT}b3xw`NUmgj$2iKVE9Cyr_%4)2IJpVZR9?3Z~4E7_;Ay|fqW`?*i< zH#>=Jh2Oczd@Z=I!reYet@i|;K(BY84BxpQ+BG!S{r#Rej#%Z6xCZkO?cC$;em+Mf zKgu28w^$vEXfxVuz?1Hy>x4T>tl#ZHYCT3aLUF&r39dz+j{C;W#5l(t=qt1-!rQLy zduNU#d4ZBgv0C8!@?K@(C8q+X4qn>Z^ve zrXK1kAr*w98*~tJc~?SG83fc2;yJJAkvfqM@{vXWUn#=jyGShr$0ZL5oJ5ilj88!3 zHk2D0&`w?=Nx6;rP+;6bRggz$q=IVA&|He(oLkbSYUKu-N-Mg7Q<8>OogJgZ6wT31 zI+s;0_niVgwnkx7l2N!}3z^4H2!-FY3=hJB*2A2)#p}r}Tl|F#KcQJ)=D%6-gHlkRztqXAV-G=|o}8SF<$bP`gM;}> zzp|uqKi1Dvqt5w3PE!%nxQc({V3sbX=it8fG+oMhs2d*YCL^oR9e^=PK!q;BAo=mto#n)?%}#Ko{qErpWmV%O|(u4*h3?04jP3lHzZ1;&wz*U7 z1PHfA{y3i_SZU(%yGc8b=bDiCC=R(3Y4KHz*O2h<4ZO5V~{iFOO8LhYn>@0}xnpBZq&S61Jehn-Bo;CIzkk`+v*=>}%bN33; z6=eHjwa4G?%%Rh|1C*DU`TVn3U%QCk?&dhcSqpVmP`pphR?3^pJl|8)WHEAhlKVYG ziw)T)>0OaN9*Plz(S-Xd;M?pjJaH`KILF;3&ZX6n>cJP|*b}sOC%Us{HAX-DdL=mJ z9CEIg8Id?Qc|O}gjkA!>o;6^Z{I^&K_;#PlX0cM_nY!Zdp5&jP-9@}CrC*@_O>+L) zH^n`3zHc=BG~cj>Jd_0w2=qCo2l~r7Ij*mxIE{gGWsOJxdMPYWT^&YAg(x+8Fab#@ zE3%`=(t_;7k<%N}1%#pvXK4;~AsggnHKm#Dm9$g>1lYak&lJf##Nq(gr8W1&AAN7)}J*NBdbb zoTM4~r>)hH0&~F!N*gQ55b{S(7P%~Cw6$Ia$&*p98I)+4Qaz&}91P}Bi4F2rCKP~T zv|&o#Qk`8xc}{P$GqamGz~@byrsMaI(}o4p(-dxDb$|2boMpu8En>XV2zvQHcif_o zN&>aXg~6vwR|PqB0}O7!Dlgu|EU_VdfvcHh2h7hk9Ekp1f3^Ya(hmY1$x zySF?!x%bNB7rf@Fxob~dTDo`7U3>QKq&&~%mGh@hFI~O5cJad8r7KIzYfs*LBTCx4 z^Ly%U>bo$1e(BzWb9e08b62&)odt`LzpI z)>h_r-LvbCU3cx@egEOygJ|FO;KKa5r>?GDor|*fzfj^fjKw<;UmGtzCQS!>2D^ zS$cHoiZ}e;-M91f?%ccc`_+~A?!5UlZ#tHyQQqXxxIh?EoJQ=Q1$$`n`P_&2}Sno JU-BjO_+My8pkDw0 literal 0 HcmV?d00001 diff --git a/bp_test/in/read_sff.in.gz b/bp_test/in/read_sff.in.gz new file mode 100644 index 0000000000000000000000000000000000000000..31e454035d6bf7ecb83b6a86314c88da2859c044 GIT binary patch literal 8641 zcmV;yAwJ$8iwFo%q)|-(19W+G0PS39kX%=FzPEdp-j~<=^z`&>nvr89jbt0!M3^*f zyDi7Uw%oS%ndw=2R%u2f8Oy}1VF^_sfuagX5mHH!5D1krn8izE<5jYeEH8i!B$b6! z{)ABZK_w~wNR;n<=k{pEBL|b>M}Fjuy8FHR?mhS1``$a>{mwa0TaP!J#u#h3Rla77 z@9WqJ*@sM<{iLl|sc1Ty(WgsmNV@(D~C7UzU!U)Zrwk!{rWw-cGpp` z-*lQICTtRB)RcT=2o;0nfviQh1!ps6&!^zrVjS+nLmU4 zFf{uRYV1X?d5j=xPMIR=#LZF6P}DThat5{HW;ebu6ZUd*40{pO31GaQ$)bm_If&my zb2EBRns=CIAhFAwGcTAQ8_)iy$w1N!YNt)m4nk%M8Vs7>LV3uHo6V5cz_>ls`k|S` zxfrDU1V_`R9W|CP=P#nan=r@PhZ)q2nwQW%j$R+f@i|`yq$jW!F{dFh zh~6Ts9p`n7Lt2o>DfB6`a4WQ$LNBkOe1X?!8Ao@RzeBA6#uUc+74$QQ`=JAOfCr7E z80{3~rA-^Y4^oS}%nzVt(LRjcJI&w2B4i<{9dqy*94Dal6PURK{%%G6h?&4lM|nNw zQ5J+=d(mc;=a+HDZ|*_snEfm?okVHGyvqCP4zvlN*9BJ?4sP~r;Wrf zO8j!hXDwQwT3RhpDd#Q@s+DTB!ZWZ@b=j-%M6Ft_q702uR(EG0q$c(8sYwSF?GNSB zNlogC9{g3|A`Eq@6Iw{WH3}4U>$R#J@vkD?iHvGpWs3HcFTHY9B2Bxfw+2#&GEk`^ zda9xoj)R)xx7yd&*IO(W%f%SCbS8m=a&KRM|G>b&Ahu1qVQ*j%MFYLP{n*f~zpvaY zTR)FwuZ*o58}@M8ZF(k`&31RoVQ(+a_mund*e~~%Tg_P?&E*#d#bPFtK}nZvDr&%$ zLfIl~_YFvYeTjtVok)}ti9{~fC#_q3xAycmpU;=e`PD5`C_q|4M9NO4kk3c4d13jE zUc{k3Jb5>+!=cJuSKhe?1ylIU!_HV(0Uzv|+_Un8zKrMB2Ri@@j1{?GxNVKqXDZ2%| zc(5neqF&H^1mzjnr$zMDf$}4evKQ-|t@e*#K?3GqVXd?1;VtMjh?>vgGl{*+%y(dO zI$&=$!=~)Reh8K-2fGzSFCqJJ)Jo#~11P%{8i=h~gkC?y{yos(Q#dDs-Xx?l+qtVHQ?;AFyNzZ4gPvbSe}fp2%=j14PXiN#fdEC67R)P{;ZaB~UM=_TXl%;SF?8oTZFjob0C&s@Ft?t8JanMFE;)ESAUxp^P;X8%dTM!%)Lr<`w zi8jU|2p<(oSR!(WsV8V$1Q{Y1@r2eMcHTz`hF0H4KoSHYDhOiv2r>i+K>#0H8NrVh zP!Tk!l!HFj7udiCBoTNbWEq7-c49=Xl^ve4#6rO6p6*boN7MAc>0ZQ?yrX_3OM$Rg-(C$}jRh6LjU!h5`q$;>pm8QhB zYRzGewvwJ#A&c-qr5p$fqDl8$o-0sEpH;;U?L=hMxFaybVz#dlaa1fT3TEm!St#hG z%2bJS@+(ZBqTnX|_Fmjzpn)4O{iR%ANiB6a91I54TzkP_p-?O)ld)8?SnTQPE|p5f zVm|N1V$pn|s~aGK!(2WO0o}!(P{_-5cLS0Hl)5t9;_u{=i=heG}FEwF% zns_1uurA;|u>Wz`!#OaZ-y@G0rR5)ojS@6yqD)rZ0o0h~ND1N_i>y+X&9}?vDf5L41F}XPM=2&={JJfVqxq=}f@SiD2OaP7c;`jwHsu3K;%rSdCj$=5RMg1exCdM(2LCQE8h^*5a z(4q;+NqZIQ1b_oikUxx~hX}qyu*+e5Z?Rt|YY@zkxh_KPC?pB;jiDrke$Qd9=4hE^ zwwm@D$Q9d<)xRBvwN0UB9(6_?Gm?G{#s?k=*0@6XhjElJ%l2bj83;q2BezXr9w&)~ zIs6L7A0R@_19v^tJH)Fu3$&BpDeGbO?nSNhz=9c!`w;XJ_IrRbXKWATL8|>A%EOq2 zr|>Ov9l-H1t|g*47XEY)pRoNrv{!z31{%hInL%DDnS(H|^Cen z0Qyhk%vs!59%m_y(n+$xDb92h{d5TCehHeAru1(&9@-ZL{Eq z@Eh_N!7aaE84b<1lnbK~u<$udavX%HBfz1^MBYL)@j2F|=AZbch-Vco6buDU6cz0g1lKvbm$40cVew$h#otJcU{q(C`}U;~YQh17=hs9vdRN!{Ur!d;DEKy1Mm z71A=rG;$UJGXV`qT7?)QTZKB+6#W!134e|h5j88yuLxZ=#UFMn?d#P|m)tgQ-aNc{ zXlQtNv+VI(c7}#FZ@zScjT^{^aHzpPVA3j^TC)ko8XPR~?8ehtHaK{xbFKmi*Lt3| zp?0fi@Zu&~$(GG}g>t!6NF<_hW7gt{h0Xu3^>}i_fjf4gK>QPn_yqWjPvPku$J2Mt zoWj#27XK)ox(-?#S=CRN@7fYdn)HY@xSpS*-F=!~pmDA~L$u-%+VBRSda?Q|w4YDX z8cpH*oby06=pUQF3U!9hyLjRjQ1UeGx_G$8u@^&q;G2CL>}v=THrr32bQ1O{iw`We z{cY+yPWu(K-Si$FfW5k(nw*E6(?m#-CHmJ<;}ntP0QOFpN7<{eg&4;3C~7=xI_OWF zK#y5lAo9e`ce&mcyZ=7vorbe%(M+Vq|F^3>0L+pxS{7--tHRua{gx7HvcTNJcm~<#PM^H$3dIxWOLP7!g$pGbWI05l z>Z)M)E)02W~h@-l&RH}-VnMJCJ1bF`SFwj&qYWg z9y-z_2UMWqP(<;N!>f_NDZCK(YL~L&gM+|~A=x$$ZGPQ0)T$-^wRH{E;nu6>eATkI zzH#P_P>0St*%5Sjqe`1z-yM5Fi`SK}g_Bh!hBj?P6LHA(6bgkkIAUJI6Y|=69J%s_ z`b{X17{VMq<5OfClX$8Ecv>S^?TQsoU>z*Hr4vu+G@pIp6tY5m618P@|0JHecC^cq zVaQV(gZ=#tJgdSUWIer)v7EDvznsVOEAgUxVZ+6qB~W7$&ukjcvsm}X={Zw=Gl8=n z@FR{D<1~&#M6g+|wWlHfNgR2wObM=Z)6nKA+OY`jaDtX3$rY_fUz7Nq(rEV-tVB2M zT9|#z6F+1xOmCR-8d-sh&55w*D6P51ilVF+$48<{iX#X^eCAY=kPmMnPupP zX5Xrn7Jrv@sNpW$5Ew2HNG}U$IoDx{5Ra-7F!bgQ< z#93!S85ndqruD8m8?C;sYAYxsKvhxC6n#}S;RA{rRSmr;45>2G4LcwP)YWTOa}gdz z719Zf8j19}V>*Hz?8?QS*o(qdwI!;u5jVIA-GG4w;}{;kxV#@89v&KgL&N?{ZI>n& zzmRz-_2_$JbXmJGHdH1~w7_CeA~}N+;OSY*JH%`Lz&gBm=f3N&t`IK_p15|bzYgK4 z!3x}dm0pxMp13BCO7sGVeS8$(e)|#H!!A70Um_yN>a?A!-T=z5nzVg*_5$X1#_Ln| z4{6;(u+LAzGPL2De#QJ4Pv#Jw>Mor53VM<_>t(Pb;#C-fMvM6SKAzqE#ES)3-LUyK z?P=8PM(@H|ny^5A)GeCFASZ6VhIOe}*0|kA>m0V*=^xtziz9LCeZ;32bd1s}&6*OP zyzkhpn4u2TS~l;ZO^%|^qxkMdjcNK<9(E`YV+KH!`2fm@_Cm7X{534tIC=>>Psl@P za||Opf&Lx=&K?BAnZmbZI2^WrMSs^4EtahQPvKiuzjvcNXugi}{jBk2pjJC%%;Efg z^f+KnVoqh%{}$MEAM*sV=*yTph#Y@_x-UUTiGD~v&agd!5r)lYFw?T8elNxo$2H8D zzc9OG&F!n2ZDeLWkoG*e%lmMgfwp1j|1;ueyZHu>J^J$sw9Z|Ovs*?z?lc2-KjuyR zQBz!FgfS~QT$QHH!K%m2AU&W7=spNNkD6;CbDl`_g!9ooXZj)Gh`9x&drcYgPr7)@ zN71$)eSU+CtIvD@TI6jS*X9x8dmrYjZnj`F$DrFHBP8d{2>zDM<&e1-HD}OI4kMJ< z;wEsap!pPL`~8q`3;k#Xb01m?quGwJZvl%ti2CnE+1bGL2J~YABsP9PQ9!f9t zv{5gR&oK*zKh&Zt#P}F3QBzI3Z(E)thS>d#vpW})K!ppxh#-x)`xz?nxljm6f>H?< zh_sg5i|vfdRPZ}R2{BfQjZp^`QAzz|nn~bSJ#+v*O;B(-3of02@f=iZGnRu_o=|BS z>WcF#(S%oZz3<#b8lPaip17i3Er!l(H8uSj%23Z%ebuWZSWwe=JA?QND3nnnRnW-0 z#!y7M209eu=qXc|*{Vy7?FfUFigU%0x)8}+_W~>$XQ^uV#<|v1s4}ez$qT8B;%AM5 zP~}~MMxs-xRLXnUdU{}$dx|~WiixzB%YkNKKUc{0^ptygO5OQHyPS3b$W z2e6?#uVIivSB4=PMo=Wk62d+MEtgzO|dFP`9UB93Q2oJd4)bdk^e2du)+*W=UP8?N5Xe1HzdjRTA^ z3x_yDk9Cv&a$y?NTy;)!jo%~&%$gq%zvdbFc-irbB>n3dNDOm5EBr!ovu3$kox*vs zIpWO^@u-R4XNW+PC>3wdbFdU~V#G9LC+Q_hh{We}D%XE$+GK227YWENQnv#>xm^E7kf zLex{EIC6a?f55L^M=Mw*OsfMUjx)ZImaIMd1!gj|!?LG|&JD~*h)#kk44n`?3yyGzm;#fcaZ~WihnqbcPxAZ)qk0L+pOPLWtEUi&Z+L zDPLsQ9H#lYCsa>w{(3mDS@d4 zK2=Y%<60pXvKg1xRjswmFaME#NQ{F4jT!?HHD~PUL#rx!XhK@eg-jFxG%nIYBZY)D zF%gaOlZ`mrubj!b#fStdqvbNw08cAFP90xW=S)*vW5l0MJL<2o{krqt5iQq*SuVjP zHZh)Z$!%z;T$az9CoR8%?TwkuFWCC}hSqw$)<#w?@|b{Ww;k*A#&+!4eGQ|u0s0`~ z%x;>2#kdix#}2dCw}mU;mwa!-YEbfW%FIP-V+6I0zAW{lr(xm5_MF3-(2u7wO*B{miD9#tMJF^t9TZz#R_%NN3}$~B%keRv9k7aT#+8Y%C$&4 zx=3G*e2i^iv&)j7f3<1YG1<`{SGK#z~n6Ly*rp1oYL z3vbJ!wo#rOU)qHKqGY)b_Z^i06V!lTY z*KUn`GY&6a|A(2OcM9uw@#=}^N-Xk%?Lo^+D0#kfPJ zhPn~*iX-ghyz{z6$r>;-_GXky1WsaBXTgR#Fe83OWCCdM9;@>wSNccv9NrI!ckwQY zz<%{H{w8aSllUv)ej0@}7eC!TW)RPii7aA;@P1|{#?3VtRm$9pGimy&f{echppC2( zMsbhl=~4YtTg2TG2SWM4ezWA2a(~EOCi&d=7+FC5CR%PWe+zx@0mFV5bUKC7QOsfp z>y=5|C&zIn;dl&J?Jf(9J3jiPjM`)Cki=zmiS6}k_?~Vshb+c#cfTdadTMZ+v)It+-g)aT$pgZX49>Jrn0Y}1O~ zXat6Bty|%fo491JiV_%cGpXx{N!U(GB6?0J2a&*ZZ2|x zsxl2)dq)#Ajzfa6%6@9jDOPJax-QZS@sic58ek{5sppHXv3d(Sbg$(ht1^wCl>qGm zF>G5E#(4oTUa5|slGG@d%Q#UkNoA-}F87PTo{M?UDyO%9wFC`IQoU4=bT1Z{ zKsG#-%9)Z>1w$&ySH4ilW~12@y2z)5!-T`J7}!rLmCBZiS;;|LvpHPJ16OkEapksM zSKojFc|I5EkC>n*E5KN&M6#cE@dkOSN8MUpqVuz~j7MNGJ59iDp{)zxiS=;&80^I) zee1^<%?{$*W9)y1IFhi}F#qUz*wX}Ux8{CHE)Juw_O0msC3;$tWG)hQ6<^d7wDS|@ zMRJ=2%Fi$kbeeodGOSXRAPnkh#``B35gxY#jvp;C0`Og~W;+?_5YM}ZHSuBkl^$gL z{4w-4PdoGkInimmmG5IDIZH7Y&1o`LaVOc+cVNK;X#@`?3jY)}ns(Vt9k6RD(`h9K zK;8?QWN!icA!stg)$vn&r$xN^g1A%Ii_y*>;rvT3gfO5iV-shXsg__|BTWmPX1-g5 z?CTIY*CJ+YiMqd=Yv^&#++n_t6asPtfmC8+U$JlFwQggEcEf(2Z^OJoZ=%)zR8H#9{U+wgBklpyOXGv#?d_U%|<1@Z$C@qO9A`BnC(%rCGmWEkb9Wi z=yB#lH+c^n=Y2BIIQ=qXFt_n;j4+>1RwZx83?$9nz9~o(wwVM@N*-N1up?+Xz%RqA z9688*!4O)?o%sy&#^$-skqq=1zGw3)qx&*52g#cP&`aLZ8ON*$`o5xmQVoH$m;>Eo z^miHcHW4<+Nfdsp2H7oF{%XxNr&sN}x~?>?Pq1)F#v1%ktnjt+kd0;plPFlV@eJ(K zFGhP$9%S`39rs3#k2J+-VzbL7))Y0$QM^)uLV)3`FFuEW{uX9xiWpJIueidLYNll? z_|&T{woqyL#WcXJZYB+t6A`MK(&bEfy{6tW&CaSgdta}()ME|lYc#~AGBaRP)l6dL z9d(C0b;Y@=i}y1}tk#OWGXkVR6~PJRF)p9E(#mI6o>X`E=on0uYE(5{jURo{rtZ?7 z6_s=pnn_)Ck)Sus2pb-DLHyNi6T|mIZ)x7(=9$ffKwkiO?mCs)6+xuT#tLVr@PzXP$zJxeZ3#-=9~B2 z$Q7MnNIN|@!f)=w6FNqJ%=>7|#$cE3#MzWBkOejDz4V7SvBHn@4a<{^4NqW=I}04z zWJ>hgJ+U0<&=?tLtMZkI;svU^h;|3Kh(LqMod`_cLQO#`r>v zIminxQz=5cOWLbpFGjiQK0_Z}n%+FIjC1szWbJm^?HP_V#%NH0UbM5${}pl3{83`5 z#DIeI&Kx2iNWzlN^8E|VAliqgFbLhx+AlD|uKukC*Vy7eyN3uRxowiEb{Mv7j&H}v z%3VBgNg%Dh4LeOd-G|gg0dog8B?>MiFeRY*$XLw z%LmrQf9rL~YGu`Et8c2NNymHx((wtVmANR4dwY^-K!aX&iD0XgqE>H_B4!(Fn`*6< z4yN?Vy;!MlS+2ez>fBsPq6lK7F;4l~>YF#}h*JcsYqqoV{4t%N(n@QtE4%^PT!z=c zfWH4IZz<;a?&YRoG1CrW#Jr1*m)ayezshZm!2UNaoNuLM``)ZN@>b@$fYFidzYuLyYi|lcWzrf z(vywJ$?4Ff54?HC!kc5GzF*vMPQwqMDe z9b-4%vTytLk*h~`Tz$=0W9rD<(UrNGv5{*>c8pxLd;3j$RRgwn8XRpb-g|uRxRzc2 zdYM;@U9nzf^X|pP+g6sS+R+(kxvss+TyC70y>8o7<3wZIMRD8ySNd-IWi{Hia~r#t zi?GdIg`LOlZY<87xc839rIqQsrdMQAuiL&ASMQ3Q+b(r3v~%0VtGU4;y7fyJHjvq? z+AnN);Wlv7g$;|1U;p%l4S~4LCogPB9_f4O!iEII;^!}HNN&&UzOdm1s@T6@*zliF T*oQB;eEI(dKaT6WXhZ-2%+2BP literal 0 HcmV?d00001 diff --git a/bp_test/out/read_sff.out.1 b/bp_test/out/read_sff.out.1 new file mode 100644 index 0000000..299cfe3 --- /dev/null +++ b/bp_test/out/read_sff.out.1 @@ -0,0 +1,90 @@ +SEQ_NAME: FQIBXOY01DRIMT +SEQ: TCAGTCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTTC +SEQ_LEN: 279 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 277 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: aaa`[[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMMM +--- +SEQ_NAME: FQIBXOY01AV4UR +SEQ: TCAGCTTGAGCAAATTCTTTATCTTTAAAATTAAACATTTTGTTGAAATTACTGTATCTTTAAAACTTAGATTCAATCGCTTCTTTTATTCTCTTCTGATGACACTCCTACTTGATTCGCAATAACTCAATCCAAACGACCAACCAATGTCAGCTAATTCATCAAGTTGTACGTCTAACGGCTTACCTGGTTTCTTCTTCCAGTTCTTGAACGTTTCCTAATGTGTTAAACCAACTTCTAAAGAAATTCAACCACATACGCAATCTTGCTATCTCGTAAATTTAAGTTG +SEQ_LEN: 289 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 69 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: ```````````UUU\\`[[[]Wa]XXMMMMKKIIIRKMMMMRIIYY[[QQNSQ[[^^\]]][[XXRNNMXYZ\ZZZ[[XWXX[[[\[^JJRW\\WVVVV\\ZUUUUUUOOOMMMMVZSSRSSPPPPYXXXWVVWMMMSKMMMMMMMMMSVVVXXXSUOOOO\\UUUSOOOUUWWURSSUSMMMMMRRMMMUUMLIIRPTRRLLLLOOSRRRWWUUUOOMOOOSTTTTQQQKKIIKKKQKKKKKKMMKKKKKKKKKRUUUUUUVVVVVUTTUUURRPOKKKMMMKHKHHR +--- +SEQ_NAME: FQIBXOY01CU7IT +SEQ: TCAGTTCGTAAAAGTGTGATAGATGATGGCAGATGTTATCTCTGTCCGTGTCTAGGCTATCCAAGACAATGGCGTTCAGAAGATATTTACCAGGAAATAAATGAGACGATACAAATAATAGAAATTTAAAATGCGCAAACCTGACCCAGTTTGCGCATTTTATGTTTTACACACGCGAGTAATGTGTTTACTTACGTGTGTTTATTTTGTTGCTGATTTTCAATTGTATATGAATGTGGTTGCACATAAATGCACTTTC +SEQ_LEN: 259 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 257 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: eeeeeeeeeeeeeeeeeeeeeehhhghffgghhhhgghhhhhhhgffhheeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeaaadeeeda]]]ddeeeeeeeeeeeeeeeeeedddedd]]]]dddeeeeeeeeeeeeeeeeedddeeedddddeeeeeeeeeeeeeeeeeeeeeedd\\\ddddeeeeeeeddddddddeeeeeeeeeeeedddddddedddddddddddaaadddddVVRY__]YPPMO +--- +SEQ_NAME: FQIBXOY01CEZSI +SEQ: TCAGAGGTTATGACGTTAAAGCTATTGATGGTCATTCGAACATAACAGAAGCAAGTTTGAAAAGTTCCAAAATATTTGTAATTCCTGAGGCTAACATTCCTTTCAAAGAATCAGAACAGGCAGCAATTGTTAAATATGTGAAACAAGGTGGCAATGTTGTCTTTATTTCAGATCATTACAATGCTGACCGAAATTTAAATCGTATTGATTCATCGGAGGCAATGAATGGTTATCGACGTGGAGCATATGAAGATATG +SEQ_LEN: 257 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 253 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: eeeeeeeeeeeeeeeeedddddfhhfffffhfhhhhhhhhhhhhhhhhfeeeeeeddcccceeeeeeeeeeeeecccc```ddeeeedddddddeeddddeedd```eeeeeeeeeeeeeeeeeeeeeeeecccddeeee\\\deeeeeeeeeeeeeeeeeeeeeeeeeccceeeeeeeeeeeeeeeeed\\\ccceeeeeeeeeeeedddeeeeeeeeedddddddeefeddddddddddaaaddddd____^YYY +--- +SEQ_NAME: FQIBXOY01C4ETH +SEQ: TCAGTGAAACAAACACGCAACAATCATTTGCTAATTGTAAGCAACTTAGACAAGTATATCCGAATGGTGTCACTGCCGATCATCCAGCATATCGACCACATTTAGATAGAGATAAAGATAAACGTGCATGTGAACCTGATAAATATTAAACAACAAGCGAATTGAATTCAAATTGTATTTAGCTTTATGCACTAATCACATAGTAAATAATGAGGGAGATTTTTTAGGCATGAGCAATCAATTCAAAAG +SEQ_LEN: 249 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 248 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: eeeeeeeeeeeeeeeeeeeeeehgffaaahheggeddddddddegggfgeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddeeeeeeeeeeeeeeeeeedddeeedddddeeeeeeeeeeeeeeeeeeeeeeedd\\\adeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee`_____aaaddaaadddddd[[[[dZZZZ^ +--- +SEQ_NAME: FQIBXOY01BXD7A +SEQ: TCAGAAAAGCTTTATAATTTTATAATTGCTAAATCTTTTCAACAACCAGTTGGAAGTACGTTCACTTATGGTGAATTAAGAAAGAAATATAATGTGGTTTGTAGCACGAATGATCAACGTGAAGTAGGACGTCGTTTTGCTTACTGGATTAAGTACACGCCAGGATTACCATTTAAAATGTAGGAACAAAAAATGGCAGTTATTATATCAGAAATAGGGATAAACCCATGTCAATAATAGCACGCCTTCGAAAGGAGGTGATTGCTAATGAGTTACACNTTAGTTTG +SEQ_LEN: 287 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 177 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: ]]]XRRRR^^[[[^[^^^^^^[PTRWW```^^^WYW^^_^^]\XXVWWYXXWWXX\a\^^[[[\^^^^\]]\\XXXXVVXPPPVUWW]Z\\\^^^^^`^^WUW````^^^``__^^^^`^^[[[_\\\\\\\UPPPPPMVXXZXQUUVVVZZ^\XUUUUUUYZUOOMMMWWUUUMMMMJMMWRRMMMNNNNNNRRWURLLLRPPRTUUULLLLLRMUUURQOSSXTTMOOOWWWWWTTOQQQKKKQKKKKKKKKKKOKKKPKKKKPRROPKKKPRPOO@KKPPLKKK +--- +SEQ_NAME: FQIBXOY01BWE7M +SEQ: TCAGTATGATGACGGCTAATGATGATGTAGAGGCGCCGAGTGACTTTGAAAAAATCAGAGCTGAAGTTTCATGGTAATAGATATTATCATTTTTGAATTAATTATATTAATGTGTTTAGCAATAGCACTGGAGGTGTTGTAAATATGTGGATTGTCATTTCAATTGTTTTATCTATATTTTTATTGATCTTGTTAAGTAGCATTTCTCATAAGATGAAAACCATAGAAGCATTGGAGTATATGAATG +SEQ_LEN: 247 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 242 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: eeeeeeeeeeeeedddeeeeeehhhhhhhhhhhhhhhhhhhhhhhhff__ff__eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeedddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeaafddddaaadddddddeddddddddd +--- +SEQ_NAME: FQIBXOY01A2RM8 +SEQ: TCAGAAACTAAAAAACTTAAAAAAGCATGCCAATCAGTACATCATAATTGCGTCTTGGGGACAGACAAATGATGAATAGAGATTGGCATGCTTTTTATTTTTGAATATAAATATTTAGTTCATGGCATTTCTAGTTACATGACGTCCATGAATTAAGAAGTAAACAAGCATAGTAATGATTGCTAAAGCGGCCATAAAGCCGAAGATTTCACTATATGAAAACATATGAGTAAATAACCCAAGGAATGATGGACCGAAGCCGAC +SEQ_LEN: 264 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 261 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: ```^OOO[ULLLLLLS[[IIIIII[[`````bbbccddcccddddddddbbbbbbbbbbb__^_^[TTT__`bbbb`^_^bbbbbbbbb``YY^^Y`JJJJJZZZ]]XXZ``[TTTZ``]bbb`___ZZZ_]bb````^__b_^_^^`````bbbbbbbbb```b`````bbbb``]]]bb`]YSRRR`````]QQQR]]]`]]]]`b``XXX]`XXXXYYVVMVSZZZXSNNNNKKNNVXXSSSV][VPPPYYYYYYYYYYYY +--- +SEQ_NAME: FQIBXOY01API7E +SEQ: TCAGCAATAGATATAATTTATGGTTTATATCTATTTCGGCATCTTTACCTTTCACTTGTTCAACTTATGTACCATAAATACTTCTGACAAGTTACTAATTAAACATGCAACCTCTAACTCAATTTAATATTTTAACTAACTTGTAATATACAGGATTCATCACGCATAATCAACCCTGTAAAACTTGATACGCAATAAAAGTTTTAAAGCATTTTATTGCGACAACTGTCTATCTATGTTTTTTCAAACGAATTTCATCAACTAGATTCCAGATAAATTC +SEQ_LEN: 280 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 278 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: eeeeeeeeeeeeeeeeeeeeeeggfffffhffffffffffefeeeefeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddeeeeeeeeeeeeeeeeedddeeeeeeeeee``\\\``eeddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeccccdddeeeeeedccccdeeeee```ceeee````WWceeeeeccccceedddddddddddddda\[[[^]]]adaaad_____________^^^_\\\\^___\\\\\\ +--- +SEQ_NAME: FQIBXOY01BJJRF +SEQ: TCAGTAACTATCAAATAAAATGATAACGGTTTCATCTATCTATTTTATCGGTCTAGTGGCTGATTTCAAGCTAGAAATATTGAATGACAATACAACTCTGTTAAAATGATGGACGTAGACAAATATGCGTATTGACGCTTTATTTTAAAAATTAACATGCTTATAACATGTTTATAGAAGGAGATTAACCTATGAACTATCAAGTTCTTTTATATTATAAATATATGACGATTGATGACCCTGAACAGTTTGCTCAGGATCAC +SEQ_LEN: 263 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 260 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: cccc```ccbbb___bXXXXbbcccegeeddffffffedddecdddccccbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccbbbccccccccbbbbccccccccccccccccccccccccccccccccccccccVVVVVVaccbbbcccccccccccccccccccccccccccccccccccaaaaccccccccccccaWRR\YVVVVVVYYYY +--- diff --git a/bp_test/out/read_sff.out.2 b/bp_test/out/read_sff.out.2 new file mode 100644 index 0000000..7591dc9 --- /dev/null +++ b/bp_test/out/read_sff.out.2 @@ -0,0 +1,9 @@ +SEQ_NAME: FQIBXOY01DRIMT +SEQ: TCAGTCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTTC +SEQ_LEN: 279 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 277 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: aaa`[[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMMM +--- diff --git a/bp_test/out/read_sff.out.3 b/bp_test/out/read_sff.out.3 new file mode 100644 index 0000000..a8e3bcf --- /dev/null +++ b/bp_test/out/read_sff.out.3 @@ -0,0 +1,9 @@ +SEQ_NAME: FQIBXOY01DRIMT +SEQ: tcagTCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTTc +SEQ_LEN: 279 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 277 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: aaa`[[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMMM +--- diff --git a/bp_test/out/read_sff.out.4 b/bp_test/out/read_sff.out.4 new file mode 100644 index 0000000..9809971 --- /dev/null +++ b/bp_test/out/read_sff.out.4 @@ -0,0 +1,9 @@ +SEQ_NAME: FQIBXOY01DRIMT +SEQ: TCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTT +SEQ_LEN: 274 +CLIP_QUAL_LEFT: 4 +CLIP_QUAL_RIGHT: 277 +CLIP_ADAPTOR_LEFT: 0 +CLIP_ADAPTOR_RIGHT: 0 +SCORES: [[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMM +--- diff --git a/bp_test/test/test_read_sff b/bp_test/test/test_read_sff new file mode 100755 index 0000000..3b05613 --- /dev/null +++ b/bp_test/test/test_read_sff @@ -0,0 +1,19 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -i $in -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -i $in -n 1 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -i $in -n 1 -m -O $tmp" +assert_no_diff $tmp $out.3 +clean + +run "$bp -i $in -n 1 -c -O $tmp" +assert_no_diff $tmp $out.4 +clean diff --git a/code_ruby/Maasha/lib/sff.rb b/code_ruby/Maasha/lib/sff.rb new file mode 100644 index 0000000..3e0fc37 --- /dev/null +++ b/code_ruby/Maasha/lib/sff.rb @@ -0,0 +1,225 @@ +# Copyright (C) 2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This software is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Error class for all exceptions to do with SFF. +class SFFError < StandardError; end + +# Class containing methods to parse SFF files: +# http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format#sff +class SFF + include Enumerable + + @@count = 0 + + # Class method for opening SFF files. + def self.open(*args) + ios = File.open(*args) + + if block_given? + begin + yield self.new(ios) + ensure + ios.close + end + else + return self.new(ios) + end + end + + # Method to initialize a SFF object along with + # instance variables pertaining to the SFF header + # section. + def initialize(io) + @io = io + @magic_number = 0 + @version = "" + @index_offset = 0 + @index_length = 0 + @number_of_reads = 0 + @header_length = 0 + @key_length = 0 + @number_of_flows_per_read = 0 + @flowgram_format_code = 0 + @flow_chars = "" + @key_sequence = "" + @eight_byte_padding = 0 + + header_parse + end + + # Method to close ios. + def close + @io.close + end + + # Method to iterate over each SFF entry. + def each + while (read = read_parse) do + yield read + end + + self # conventionally + end + + private + + # Method to parse the SFF file's header section + # and load the information into the instance variables. + def header_parse + template = "NC4N2NNnnnC" + bits_in_uint = 32 + + data = @io.read(31).unpack(template) + + @magic_number = data[0] + @version = data[1 .. 4].join "" + @index_offset = (data[5] << bits_in_uint) | data[6] + @index_length = data[7] + @number_of_reads = data[8] + @header_length = data[9] + @key_length = data[10] + @number_of_flows_per_read = data[11] + @flowgram_format_code = data[12] + @flow_chars = @io.read(@number_of_flows_per_read).unpack("A*").join "" + @key_sequence = @io.read(@key_length).unpack("A*").join "" + + fast_forward + + check_magic_number + check_version + check_header_length + end + + # Method that reads the eight_byte_padding field found at the end of the + # data section and fast forwards, i.e. move the file read pointer, + # so that the length of the section is divisible by 8. + def fast_forward + eight_byte_padding = 8 - (@io.pos % 8) + + @io.read(eight_byte_padding) unless eight_byte_padding == 8 + end + + # Method to parse a read section of a SFF file. + def read_parse + return nil if @number_of_reads == @@count + + template = "nnNnnnn" + + read = Read.new() + + data = @io.read(16).unpack(template) + + read.read_header_length = data[0] + read.name_length = data[1] + read.number_of_bases = data[2] + read.clip_qual_left = data[3] + read.clip_qual_right = data[4] + read.clip_adapter_left = data[5] + read.clip_adaptor_right = data[6] + read.name = @io.read(read.name_length).unpack("A*").join "" + + fast_forward + + @io.read(2 * @number_of_flows_per_read) # skip through flowgram_values + @io.read(read.number_of_bases) # skip through flow_index_per_base + + # NB! Parsing of flowgram_values and flow_index_per_base is currently disabled since these are not needed. + # read.flowgram_values = @io.read(2 * @number_of_flows_per_read).unpack("n*").map { |val| val = sprintf("%.2f", val * 0.01) } + # flow_index_per_base = @io.read(read.number_of_bases).unpack("C*") + # (1 ... flow_index_per_base.length).each { |i| flow_index_per_base[i] += flow_index_per_base[i - 1] } + # read.flow_index_per_base = flow_index_per_base + + read.bases = @io.read(read.number_of_bases).unpack("A*").join "" + read.quality_scores = @io.read(read.number_of_bases).unpack("C*") + + fast_forward + + @@count += 1 + + read + end + + # Method to check the magic number of a SFF file. + # Raises an error if the magic number don't match. + def check_magic_number + raise SFFError, "Badly formatted SFF file." unless @magic_number == 779314790 + end + + # Method to check the version number of a SFF file. + # Raises an error if the version don't match. + def check_version + raise SFFError, "Wrong version #{@version}" unless @version.to_i == 1 + end + + # Method to check the header length of a SFF file. + # Raises an error if the header length don't match + # the file position after reading the header section. + def check_header_length + raise SFFError, "Bad header length: #{header_length}" unless @io.pos == @header_length + end +end + +# Class containing data accessor methods for an SFF entry and methods +# for manipulating this entry. +class Read + attr_accessor :read_header_length, :name_length, :number_of_bases, + :clip_qual_left, :clip_qual_right, :clip_adapter_left, :clip_adaptor_right, + :name, :flowgram_values, :flow_index_per_base, :bases, :quality_scores + + # Method that converts a Read object's data to a Biopiece record (a hash). + def to_bp + hash = {} + + hash[:SEQ_NAME] = self.name + hash[:SEQ] = self.bases + hash[:SEQ_LEN] = self.bases.length + hash[:CLIP_QUAL_LEFT] = self.clip_qual_left - 1 + hash[:CLIP_QUAL_RIGHT] = self.clip_qual_right - 1 + hash[:CLIP_ADAPTOR_LEFT] = self.clip_adapter_left + hash[:CLIP_ADAPTOR_RIGHT] = self.clip_adaptor_right + hash[:SCORES] = self.quality_scores.map { |i| (i += 64).chr }.join "" + + hash + end + + # Method that soft masks the sequence (i.e. lowercases sequence) according to + # clip_qual_left and clip_qual_right information. + def mask + left = self.bases[0 ... self.clip_qual_left - 1].downcase + middle = self.bases[self.clip_qual_left - 1 ... self.clip_qual_right] + right = self.bases[self.clip_qual_right ... self.bases.length].downcase + + self.bases = left + middle + right + end + + # Method that clips sequence (i.e. trims) according to + # clip_qual_left and clip_qual_right information. + def clip + self.bases = self.bases[self.clip_qual_left - 1 ... self.clip_qual_right] + self.quality_scores = self.quality_scores[self.clip_qual_left - 1 ... self.clip_qual_right] + end +end + +__END__ + -- 2.39.5