--- /dev/null
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2010 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This program is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Read SFF entries from one or more files.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+require 'biopieces'
+require 'sff'
+
+casts = []
+casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'mask', :short=>'m', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'clip', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+
+bp = Biopieces.new
+
+options = bp.parse(ARGV, casts)
+
+bp.each_record do |record|
+ bp.puts record
+end
+
+num = 0
+last = false
+
+if options.has_key? :data_in
+ options[:data_in].each do |file|
+ SFF.open(file, mode='r') do |sff|
+ sff.each do |entry|
+ entry.mask if options[:mask]
+ entry.clip if options[:clip]
+ bp.puts entry.to_bp
+ num += 1
+
+ if options.has_key? :num and options[:num] == num
+ last = true
+ break
+ end
+ end
+ end
+
+ break if last
+ end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
--- /dev/null
+SEQ_NAME: FQIBXOY01DRIMT
+SEQ: TCAGTCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTTC
+SEQ_LEN: 279
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 277
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: aaa`[[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMMM
+---
+SEQ_NAME: FQIBXOY01AV4UR
+SEQ: TCAGCTTGAGCAAATTCTTTATCTTTAAAATTAAACATTTTGTTGAAATTACTGTATCTTTAAAACTTAGATTCAATCGCTTCTTTTATTCTCTTCTGATGACACTCCTACTTGATTCGCAATAACTCAATCCAAACGACCAACCAATGTCAGCTAATTCATCAAGTTGTACGTCTAACGGCTTACCTGGTTTCTTCTTCCAGTTCTTGAACGTTTCCTAATGTGTTAAACCAACTTCTAAAGAAATTCAACCACATACGCAATCTTGCTATCTCGTAAATTTAAGTTG
+SEQ_LEN: 289
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 69
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: ```````````UUU\\`[[[]Wa]XXMMMMKKIIIRKMMMMRIIYY[[QQNSQ[[^^\]]][[XXRNNMXYZ\ZZZ[[XWXX[[[\[^JJRW\\WVVVV\\ZUUUUUUOOOMMMMVZSSRSSPPPPYXXXWVVWMMMSKMMMMMMMMMSVVVXXXSUOOOO\\UUUSOOOUUWWURSSUSMMMMMRRMMMUUMLIIRPTRRLLLLOOSRRRWWUUUOOMOOOSTTTTQQQKKIIKKKQKKKKKKMMKKKKKKKKKRUUUUUUVVVVVUTTUUURRPOKKKMMMKHKHHR
+---
+SEQ_NAME: FQIBXOY01CU7IT
+SEQ: TCAGTTCGTAAAAGTGTGATAGATGATGGCAGATGTTATCTCTGTCCGTGTCTAGGCTATCCAAGACAATGGCGTTCAGAAGATATTTACCAGGAAATAAATGAGACGATACAAATAATAGAAATTTAAAATGCGCAAACCTGACCCAGTTTGCGCATTTTATGTTTTACACACGCGAGTAATGTGTTTACTTACGTGTGTTTATTTTGTTGCTGATTTTCAATTGTATATGAATGTGGTTGCACATAAATGCACTTTC
+SEQ_LEN: 259
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 257
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: eeeeeeeeeeeeeeeeeeeeeehhhghffgghhhhgghhhhhhhgffhheeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeaaadeeeda]]]ddeeeeeeeeeeeeeeeeeedddedd]]]]dddeeeeeeeeeeeeeeeeedddeeedddddeeeeeeeeeeeeeeeeeeeeeedd\\\ddddeeeeeeeddddddddeeeeeeeeeeeedddddddedddddddddddaaadddddVVRY__]YPPMO
+---
+SEQ_NAME: FQIBXOY01CEZSI
+SEQ: TCAGAGGTTATGACGTTAAAGCTATTGATGGTCATTCGAACATAACAGAAGCAAGTTTGAAAAGTTCCAAAATATTTGTAATTCCTGAGGCTAACATTCCTTTCAAAGAATCAGAACAGGCAGCAATTGTTAAATATGTGAAACAAGGTGGCAATGTTGTCTTTATTTCAGATCATTACAATGCTGACCGAAATTTAAATCGTATTGATTCATCGGAGGCAATGAATGGTTATCGACGTGGAGCATATGAAGATATG
+SEQ_LEN: 257
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 253
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: eeeeeeeeeeeeeeeeedddddfhhfffffhfhhhhhhhhhhhhhhhhfeeeeeeddcccceeeeeeeeeeeeecccc```ddeeeedddddddeeddddeedd```eeeeeeeeeeeeeeeeeeeeeeeecccddeeee\\\deeeeeeeeeeeeeeeeeeeeeeeeeccceeeeeeeeeeeeeeeeed\\\ccceeeeeeeeeeeedddeeeeeeeeedddddddeefeddddddddddaaaddddd____^YYY
+---
+SEQ_NAME: FQIBXOY01C4ETH
+SEQ: TCAGTGAAACAAACACGCAACAATCATTTGCTAATTGTAAGCAACTTAGACAAGTATATCCGAATGGTGTCACTGCCGATCATCCAGCATATCGACCACATTTAGATAGAGATAAAGATAAACGTGCATGTGAACCTGATAAATATTAAACAACAAGCGAATTGAATTCAAATTGTATTTAGCTTTATGCACTAATCACATAGTAAATAATGAGGGAGATTTTTTAGGCATGAGCAATCAATTCAAAAG
+SEQ_LEN: 249
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 248
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: eeeeeeeeeeeeeeeeeeeeeehgffaaahheggeddddddddegggfgeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddeeeeeeeeeeeeeeeeeedddeeedddddeeeeeeeeeeeeeeeeeeeeeeedd\\\adeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee`_____aaaddaaadddddd[[[[dZZZZ^
+---
+SEQ_NAME: FQIBXOY01BXD7A
+SEQ: TCAGAAAAGCTTTATAATTTTATAATTGCTAAATCTTTTCAACAACCAGTTGGAAGTACGTTCACTTATGGTGAATTAAGAAAGAAATATAATGTGGTTTGTAGCACGAATGATCAACGTGAAGTAGGACGTCGTTTTGCTTACTGGATTAAGTACACGCCAGGATTACCATTTAAAATGTAGGAACAAAAAATGGCAGTTATTATATCAGAAATAGGGATAAACCCATGTCAATAATAGCACGCCTTCGAAAGGAGGTGATTGCTAATGAGTTACACNTTAGTTTG
+SEQ_LEN: 287
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 177
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: ]]]XRRRR^^[[[^[^^^^^^[PTRWW```^^^WYW^^_^^]\XXVWWYXXWWXX\a\^^[[[\^^^^\]]\\XXXXVVXPPPVUWW]Z\\\^^^^^`^^WUW````^^^``__^^^^`^^[[[_\\\\\\\UPPPPPMVXXZXQUUVVVZZ^\XUUUUUUYZUOOMMMWWUUUMMMMJMMWRRMMMNNNNNNRRWURLLLRPPRTUUULLLLLRMUUURQOSSXTTMOOOWWWWWTTOQQQKKKQKKKKKKKKKKOKKKPKKKKPRROPKKKPRPOO@KKPPLKKK
+---
+SEQ_NAME: FQIBXOY01BWE7M
+SEQ: TCAGTATGATGACGGCTAATGATGATGTAGAGGCGCCGAGTGACTTTGAAAAAATCAGAGCTGAAGTTTCATGGTAATAGATATTATCATTTTTGAATTAATTATATTAATGTGTTTAGCAATAGCACTGGAGGTGTTGTAAATATGTGGATTGTCATTTCAATTGTTTTATCTATATTTTTATTGATCTTGTTAAGTAGCATTTCTCATAAGATGAAAACCATAGAAGCATTGGAGTATATGAATG
+SEQ_LEN: 247
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 242
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: eeeeeeeeeeeeedddeeeeeehhhhhhhhhhhhhhhhhhhhhhhhff__ff__eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeedddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeaafddddaaadddddddeddddddddd
+---
+SEQ_NAME: FQIBXOY01A2RM8
+SEQ: TCAGAAACTAAAAAACTTAAAAAAGCATGCCAATCAGTACATCATAATTGCGTCTTGGGGACAGACAAATGATGAATAGAGATTGGCATGCTTTTTATTTTTGAATATAAATATTTAGTTCATGGCATTTCTAGTTACATGACGTCCATGAATTAAGAAGTAAACAAGCATAGTAATGATTGCTAAAGCGGCCATAAAGCCGAAGATTTCACTATATGAAAACATATGAGTAAATAACCCAAGGAATGATGGACCGAAGCCGAC
+SEQ_LEN: 264
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 261
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: ```^OOO[ULLLLLLS[[IIIIII[[`````bbbccddcccddddddddbbbbbbbbbbb__^_^[TTT__`bbbb`^_^bbbbbbbbb``YY^^Y`JJJJJZZZ]]XXZ``[TTTZ``]bbb`___ZZZ_]bb````^__b_^_^^`````bbbbbbbbb```b`````bbbb``]]]bb`]YSRRR`````]QQQR]]]`]]]]`b``XXX]`XXXXYYVVMVSZZZXSNNNNKKNNVXXSSSV][VPPPYYYYYYYYYYYY
+---
+SEQ_NAME: FQIBXOY01API7E
+SEQ: TCAGCAATAGATATAATTTATGGTTTATATCTATTTCGGCATCTTTACCTTTCACTTGTTCAACTTATGTACCATAAATACTTCTGACAAGTTACTAATTAAACATGCAACCTCTAACTCAATTTAATATTTTAACTAACTTGTAATATACAGGATTCATCACGCATAATCAACCCTGTAAAACTTGATACGCAATAAAAGTTTTAAAGCATTTTATTGCGACAACTGTCTATCTATGTTTTTTCAAACGAATTTCATCAACTAGATTCCAGATAAATTC
+SEQ_LEN: 280
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 278
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: eeeeeeeeeeeeeeeeeeeeeeggfffffhffffffffffefeeeefeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeddddddeeeeeeeeeeeeeeeeedddeeeeeeeeee``\\\``eeddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeccccdddeeeeeedccccdeeeee```ceeee````WWceeeeeccccceedddddddddddddda\[[[^]]]adaaad_____________^^^_\\\\^___\\\\\\
+---
+SEQ_NAME: FQIBXOY01BJJRF
+SEQ: TCAGTAACTATCAAATAAAATGATAACGGTTTCATCTATCTATTTTATCGGTCTAGTGGCTGATTTCAAGCTAGAAATATTGAATGACAATACAACTCTGTTAAAATGATGGACGTAGACAAATATGCGTATTGACGCTTTATTTTAAAAATTAACATGCTTATAACATGTTTATAGAAGGAGATTAACCTATGAACTATCAAGTTCTTTTATATTATAAATATATGACGATTGATGACCCTGAACAGTTTGCTCAGGATCAC
+SEQ_LEN: 263
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 260
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: cccc```ccbbb___bXXXXbbcccegeeddffffffedddecdddccccbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccbbbccccccccbbbbccccccccccccccccccccccccccccccccccccccVVVVVVaccbbbcccccccccccccccccccccccccccccccccccaaaaccccccccccccaWRR\YVVVVVVYYYY
+---
--- /dev/null
+SEQ_NAME: FQIBXOY01DRIMT
+SEQ: TCAGTCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTTC
+SEQ_LEN: 279
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 277
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: aaa`[[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMMM
+---
--- /dev/null
+SEQ_NAME: FQIBXOY01DRIMT
+SEQ: tcagTCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTTc
+SEQ_LEN: 279
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 277
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: aaa`[[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMMM
+---
--- /dev/null
+SEQ_NAME: FQIBXOY01DRIMT
+SEQ: TCATATTTTTTAGAAACATGTTTGTTTGGACTCATTAATTCATGATTAAAATCACCATCATTCGTTATCAATAAAAGCCCTTCTGTATCTTTATCAAGACGACCAACCGGAAAAATATTTAGATGTTGGTATTCAGGTATTAAATCAATAACGGTTTTTGAATGATGATCTTCAGTTGCTGATATATAACCTTTTGGCTTATTTAACATAATATAGACATTTTCAATGTATTCTATTAATTCTCCACGAACTGTTATCTTATCGTTTTCTGGTT
+SEQ_LEN: 274
+CLIP_QUAL_LEFT: 4
+CLIP_QUAL_RIGHT: 277
+CLIP_ADAPTOR_LEFT: 0
+CLIP_ADAPTOR_RIGHT: 0
+SCORES: [[[_[NNNNNNTUP[[__`abbcccddddeeeeeeedddcdcccc``bbbbaaaba_`````bbbbba````____\\\\``_aabbbbbbbba````_WVV\\bbbb``\\`^_bXbbb_`_``bbbbbaaabbb___aabbaaa[[UUUbbbZZZZZabbbbbcaa[[UUU[[[[aabbbac____aPPNNNNPP]PPPWWabaaaabbbbbbbaaabbbbbaaabbbXXX__XXXXXXXXXXUYYUUUY[UUUYUUUUYXXMMMMRKMMMM
+---
--- /dev/null
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -i $in -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -i $in -n 1 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -i $in -n 1 -m -O $tmp"
+assert_no_diff $tmp $out.3
+clean
+
+run "$bp -i $in -n 1 -c -O $tmp"
+assert_no_diff $tmp $out.4
+clean
--- /dev/null
+# Copyright (C) 2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Error class for all exceptions to do with SFF.
+class SFFError < StandardError; end
+
+# Class containing methods to parse SFF files:
+# http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format#sff
+class SFF
+ include Enumerable
+
+ @@count = 0
+
+ # Class method for opening SFF files.
+ def self.open(*args)
+ ios = File.open(*args)
+
+ if block_given?
+ begin
+ yield self.new(ios)
+ ensure
+ ios.close
+ end
+ else
+ return self.new(ios)
+ end
+ end
+
+ # Method to initialize a SFF object along with
+ # instance variables pertaining to the SFF header
+ # section.
+ def initialize(io)
+ @io = io
+ @magic_number = 0
+ @version = ""
+ @index_offset = 0
+ @index_length = 0
+ @number_of_reads = 0
+ @header_length = 0
+ @key_length = 0
+ @number_of_flows_per_read = 0
+ @flowgram_format_code = 0
+ @flow_chars = ""
+ @key_sequence = ""
+ @eight_byte_padding = 0
+
+ header_parse
+ end
+
+ # Method to close ios.
+ def close
+ @io.close
+ end
+
+ # Method to iterate over each SFF entry.
+ def each
+ while (read = read_parse) do
+ yield read
+ end
+
+ self # conventionally
+ end
+
+ private
+
+ # Method to parse the SFF file's header section
+ # and load the information into the instance variables.
+ def header_parse
+ template = "NC4N2NNnnnC"
+ bits_in_uint = 32
+
+ data = @io.read(31).unpack(template)
+
+ @magic_number = data[0]
+ @version = data[1 .. 4].join ""
+ @index_offset = (data[5] << bits_in_uint) | data[6]
+ @index_length = data[7]
+ @number_of_reads = data[8]
+ @header_length = data[9]
+ @key_length = data[10]
+ @number_of_flows_per_read = data[11]
+ @flowgram_format_code = data[12]
+ @flow_chars = @io.read(@number_of_flows_per_read).unpack("A*").join ""
+ @key_sequence = @io.read(@key_length).unpack("A*").join ""
+
+ fast_forward
+
+ check_magic_number
+ check_version
+ check_header_length
+ end
+
+ # Method that reads the eight_byte_padding field found at the end of the
+ # data section and fast forwards, i.e. move the file read pointer,
+ # so that the length of the section is divisible by 8.
+ def fast_forward
+ eight_byte_padding = 8 - (@io.pos % 8)
+
+ @io.read(eight_byte_padding) unless eight_byte_padding == 8
+ end
+
+ # Method to parse a read section of a SFF file.
+ def read_parse
+ return nil if @number_of_reads == @@count
+
+ template = "nnNnnnn"
+
+ read = Read.new()
+
+ data = @io.read(16).unpack(template)
+
+ read.read_header_length = data[0]
+ read.name_length = data[1]
+ read.number_of_bases = data[2]
+ read.clip_qual_left = data[3]
+ read.clip_qual_right = data[4]
+ read.clip_adapter_left = data[5]
+ read.clip_adaptor_right = data[6]
+ read.name = @io.read(read.name_length).unpack("A*").join ""
+
+ fast_forward
+
+ @io.read(2 * @number_of_flows_per_read) # skip through flowgram_values
+ @io.read(read.number_of_bases) # skip through flow_index_per_base
+
+ # NB! Parsing of flowgram_values and flow_index_per_base is currently disabled since these are not needed.
+ # read.flowgram_values = @io.read(2 * @number_of_flows_per_read).unpack("n*").map { |val| val = sprintf("%.2f", val * 0.01) }
+ # flow_index_per_base = @io.read(read.number_of_bases).unpack("C*")
+ # (1 ... flow_index_per_base.length).each { |i| flow_index_per_base[i] += flow_index_per_base[i - 1] }
+ # read.flow_index_per_base = flow_index_per_base
+
+ read.bases = @io.read(read.number_of_bases).unpack("A*").join ""
+ read.quality_scores = @io.read(read.number_of_bases).unpack("C*")
+
+ fast_forward
+
+ @@count += 1
+
+ read
+ end
+
+ # Method to check the magic number of a SFF file.
+ # Raises an error if the magic number don't match.
+ def check_magic_number
+ raise SFFError, "Badly formatted SFF file." unless @magic_number == 779314790
+ end
+
+ # Method to check the version number of a SFF file.
+ # Raises an error if the version don't match.
+ def check_version
+ raise SFFError, "Wrong version #{@version}" unless @version.to_i == 1
+ end
+
+ # Method to check the header length of a SFF file.
+ # Raises an error if the header length don't match
+ # the file position after reading the header section.
+ def check_header_length
+ raise SFFError, "Bad header length: #{header_length}" unless @io.pos == @header_length
+ end
+end
+
+# Class containing data accessor methods for an SFF entry and methods
+# for manipulating this entry.
+class Read
+ attr_accessor :read_header_length, :name_length, :number_of_bases,
+ :clip_qual_left, :clip_qual_right, :clip_adapter_left, :clip_adaptor_right,
+ :name, :flowgram_values, :flow_index_per_base, :bases, :quality_scores
+
+ # Method that converts a Read object's data to a Biopiece record (a hash).
+ def to_bp
+ hash = {}
+
+ hash[:SEQ_NAME] = self.name
+ hash[:SEQ] = self.bases
+ hash[:SEQ_LEN] = self.bases.length
+ hash[:CLIP_QUAL_LEFT] = self.clip_qual_left - 1
+ hash[:CLIP_QUAL_RIGHT] = self.clip_qual_right - 1
+ hash[:CLIP_ADAPTOR_LEFT] = self.clip_adapter_left
+ hash[:CLIP_ADAPTOR_RIGHT] = self.clip_adaptor_right
+ hash[:SCORES] = self.quality_scores.map { |i| (i += 64).chr }.join ""
+
+ hash
+ end
+
+ # Method that soft masks the sequence (i.e. lowercases sequence) according to
+ # clip_qual_left and clip_qual_right information.
+ def mask
+ left = self.bases[0 ... self.clip_qual_left - 1].downcase
+ middle = self.bases[self.clip_qual_left - 1 ... self.clip_qual_right]
+ right = self.bases[self.clip_qual_right ... self.bases.length].downcase
+
+ self.bases = left + middle + right
+ end
+
+ # Method that clips sequence (i.e. trims) according to
+ # clip_qual_left and clip_qual_right information.
+ def clip
+ self.bases = self.bases[self.clip_qual_left - 1 ... self.clip_qual_right]
+ self.quality_scores = self.quality_scores[self.clip_qual_left - 1 ... self.clip_qual_right]
+ end
+end
+
+__END__
+